当前位置: 首页>>代码示例>>Java>>正文


Java ParquetOutputFormat类代码示例

本文整理汇总了Java中parquet.hadoop.ParquetOutputFormat的典型用法代码示例。如果您正苦于以下问题:Java ParquetOutputFormat类的具体用法?Java ParquetOutputFormat怎么用?Java ParquetOutputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


ParquetOutputFormat类属于parquet.hadoop包,在下文中一共展示了ParquetOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: writeAvro

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:17,代码来源:ParquetAvroExample.java

示例2: writeThrift

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void writeThrift(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ParquetThriftOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	ParquetThriftOutputFormat.setThriftClass(job, Person.class);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:18,代码来源:ParquetThriftExample.java

示例3: newPhysicalProperties

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static Options newPhysicalProperties(CatalogProtos.StoreType type) {
  Options options = new Options();
  if (CatalogProtos.StoreType.CSV == type) {
    options.put(CSVFILE_DELIMITER, DEFAULT_FIELD_DELIMITER);
  } else if (CatalogProtos.StoreType.RCFILE == type) {
    options.put(RCFILE_SERDE, DEFAULT_BINARY_SERDE);
  } else if (CatalogProtos.StoreType.SEQUENCEFILE == type) {
    options.put(SEQUENCEFILE_SERDE, DEFAULT_TEXT_SERDE);
    options.put(SEQUENCEFILE_DELIMITER, DEFAULT_FIELD_DELIMITER);
  } else if (type == CatalogProtos.StoreType.PARQUET) {
    options.put(ParquetOutputFormat.BLOCK_SIZE, PARQUET_DEFAULT_BLOCK_SIZE);
    options.put(ParquetOutputFormat.PAGE_SIZE, PARQUET_DEFAULT_PAGE_SIZE);
    options.put(ParquetOutputFormat.COMPRESSION, PARQUET_DEFAULT_COMPRESSION_CODEC_NAME);
    options.put(ParquetOutputFormat.ENABLE_DICTIONARY, PARQUET_DEFAULT_IS_DICTIONARY_ENABLED);
    options.put(ParquetOutputFormat.VALIDATION, PARQUET_DEFAULT_IS_VALIDATION_ENABLED);
  }

  return options;
}
 
开发者ID:gruter,项目名称:tajo-cdh,代码行数:20,代码来源:StorageUtil.java

示例4: setVersion

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Override
public void setVersion( VERSION version ) throws Exception {
  inClassloader( () -> {
    ParquetProperties.WriterVersion writerVersion;
    switch ( version ) {
      case VERSION_1_0:
        writerVersion = ParquetProperties.WriterVersion.PARQUET_1_0;
        break;
      case VERSION_2_0:
        writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0;
        break;
      default:
        writerVersion = ParquetProperties.WriterVersion.PARQUET_2_0;
        break;
    }
    job.getConfiguration().set( ParquetOutputFormat.WRITER_VERSION, writerVersion.toString() );
  } );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:19,代码来源:PentahoParquetOutputFormat.java

示例5: setCompression

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Override
public void setCompression( COMPRESSION comp ) throws Exception {
  inClassloader( () -> {
    CompressionCodecName codec;
    switch ( comp ) {
      case SNAPPY:
        codec = CompressionCodecName.SNAPPY;
        break;
      case GZIP:
        codec = CompressionCodecName.GZIP;
        break;
      case LZO:
        codec = CompressionCodecName.LZO;
        break;
      default:
        codec = CompressionCodecName.UNCOMPRESSED;
        break;
    }
    ParquetOutputFormat.setCompression( job, codec );
  } );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:22,代码来源:PentahoParquetOutputFormat.java

示例6: RecordWriterWrapper

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public RecordWriterWrapper(ParquetOutputFormat<V> realOutputFormat,
                           FileSystem fs, JobConf conf, String name, Progressable progress) throws IOException {

    CompressionCodecName codec = getCodec(conf);
    String extension = codec.getExtension() + ".parquet";
    Path file = getDefaultWorkFile(conf, name, extension);

    try {
        realWriter = (ParquetRecordWriter<V>) realOutputFormat.getRecordWriter(conf, file, codec);
    } catch (InterruptedException e) {
        Thread.interrupted();
        throw new IOException(e);
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:DeprecatedParquetOutputFormat.java

示例7: shouldUseParquetFlagToSetCodec

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public void shouldUseParquetFlagToSetCodec(String codecNameStr, CompressionCodecName expectedCodec) throws IOException {

        //Test mapreduce API
        Job job = new Job();
        Configuration conf = job.getConfiguration();
        conf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
        TaskAttemptContext task = ContextUtil.newTaskAttemptContext(conf, new TaskAttemptID(new TaskID(new JobID("test", 1), false, 1), 1));
        Assert.assertEquals(CodecConfig.from(task).getCodec(), expectedCodec);

        //Test mapred API
        JobConf jobConf = new JobConf();
        jobConf.set(ParquetOutputFormat.COMPRESSION, codecNameStr);
        Assert.assertEquals(CodecConfig.from(jobConf).getCodec(), expectedCodec);
    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:CodecConfigTest.java

示例8: writeProtobuf

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void writeProtobuf(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ProtoParquetOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	ProtoParquetOutputFormat.setProtobufClass(job, Person.class);
	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:16,代码来源:ParquetProtobufExample.java

示例9: ParquetAppender

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
/**
 * Creates a new ParquetAppender.
 *
 * @param conf Configuration properties.
 * @param schema The table schema.
 * @param meta The table metadata.
 * @param path The path of the Parquet file to write to.
 */
public ParquetAppender(Configuration conf, Schema schema, TableMeta meta,
                       Path path) throws IOException {
  super(conf, schema, meta, path);
  this.blockSize = Integer.parseInt(
      meta.getOption(ParquetOutputFormat.BLOCK_SIZE));
  this.pageSize = Integer.parseInt(
      meta.getOption(ParquetOutputFormat.PAGE_SIZE));
  this.compressionCodecName = CompressionCodecName.fromConf(
      meta.getOption(ParquetOutputFormat.COMPRESSION));
  this.enableDictionary = Boolean.parseBoolean(
      meta.getOption(ParquetOutputFormat.ENABLE_DICTIONARY));
  this.validating = Boolean.parseBoolean(
      meta.getOption(ParquetOutputFormat.VALIDATION));
}
 
开发者ID:gruter,项目名称:tajo-cdh,代码行数:23,代码来源:ParquetAppender.java

示例10: PentahoParquetOutputFormat

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public PentahoParquetOutputFormat() throws Exception {
  logger.info( "We are initializing parquet output format" );

  inClassloader( () -> {
    ConfigurationProxy conf = new ConfigurationProxy();

    job = Job.getInstance( conf );

    job.getConfiguration().set( ParquetOutputFormat.ENABLE_JOB_SUMMARY, "false" );
    ParquetOutputFormat.setEnableDictionary( job, false );
  } );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:13,代码来源:PentahoParquetOutputFormat.java

示例11: setOutputFile

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Override
public void setOutputFile( String file, boolean override ) throws Exception {
  inClassloader( () -> {
    outputFile = new Path( file );
    FileSystem fs = FileSystem.get( outputFile.toUri(), job.getConfiguration() );
    if ( fs.exists( outputFile ) ) {
      if ( override ) {
        fs.delete( outputFile, true );
      } else {
        throw new FileAlreadyExistsException( file );
      }
    }
    ParquetOutputFormat.setOutputPath( job, outputFile.getParent() );
  } );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:16,代码来源:PentahoParquetOutputFormat.java

示例12: recordWriterCreateFileWithData

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Test
public void recordWriterCreateFileWithData() throws Exception {

  WriteSupport support =
    new PentahoParquetWriteSupport( ParquetUtils.createSchema( ValueMetaInterface.TYPE_INTEGER ) );

  ParquetOutputFormat nativeParquetOutputFormat = new ParquetOutputFormat<>( support );

  ParquetRecordWriter<RowMetaAndData> recordWriter =
    (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat.getRecordWriter( task );

  PentahoParquetRecordWriter writer = new PentahoParquetRecordWriter( recordWriter, task );

  RowMetaAndData
    row = new RowMetaAndData();
  RowMeta rowMeta = new RowMeta();
  rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
  rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
  row.setRowMeta( rowMeta );
  row.setData( new Object[] { "Alex", "87" } );

  writer.write( row );
  recordWriter.close( task );

  Files.walk( Paths.get( tempFile.toString() ) )
    .filter( Files::isRegularFile )
    .forEach( ( f ) -> {
      String file = f.toString();
      if ( file.endsWith( "parquet" ) ) {
        IPentahoInputFormat.IPentahoRecordReader recordReader =
          readCreatedParquetFile( Paths.get( file ).toUri().toString() );
        recordReader.forEach(
          rowMetaAndData -> Assert.assertTrue( rowMetaAndData.size() == 2 ) );
      }
    } );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:37,代码来源:PentahoParquetRecordWriterTest.java

示例13: recordWriterCreateFileWithoutData

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
@Test
public void recordWriterCreateFileWithoutData() throws Exception {

  WriteSupport support =
    new PentahoParquetWriteSupport( ParquetUtils.createSchema( ValueMetaInterface.TYPE_INTEGER ) );

  ParquetOutputFormat nativeParquetOutputFormat = new ParquetOutputFormat<>( support );

  ParquetRecordWriter<RowMetaAndData> recordWriter =
    (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat.getRecordWriter( task );

  PentahoParquetRecordWriter writer = new PentahoParquetRecordWriter( recordWriter, task );

  RowMetaAndData
    row = new RowMetaAndData();
  RowMeta rowMeta = new RowMeta();
  rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
  rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
  row.setRowMeta( rowMeta );
  row.setData( new Object[] { null, null } );

  writer.write( row );
  recordWriter.close( task );

  Files.walk( Paths.get( tempFile.toString() ) )
    .filter( Files::isRegularFile )
    .forEach( ( f ) -> {
      String file = f.toString();
      if ( file.endsWith( "parquet" ) ) {
        try {
          Assert.assertTrue( Files.size( Paths.get( file ) ) > 0 );
        } catch ( IOException e ) {
          e.printStackTrace();
        }
      }
    } );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:38,代码来源:PentahoParquetRecordWriterTest.java

示例14: setWriteSupportClass

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void setWriteSupportClass(Configuration configuration, Class<?> writeSupportClass) {
    configuration.set(ParquetOutputFormat.WRITE_SUPPORT_CLASS, writeSupportClass.getName());
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetOutputFormat.java

示例15: setBlockSize

import parquet.hadoop.ParquetOutputFormat; //导入依赖的package包/类
public static void setBlockSize(Configuration configuration, int blockSize) {
    configuration.setInt(ParquetOutputFormat.BLOCK_SIZE, blockSize);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:4,代码来源:DeprecatedParquetOutputFormat.java


注:本文中的parquet.hadoop.ParquetOutputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。