当前位置: 首页>>代码示例>>Java>>正文


Java CompressionCodecName类代码示例

本文整理汇总了Java中parquet.hadoop.metadata.CompressionCodecName的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodecName类的具体用法?Java CompressionCodecName怎么用?Java CompressionCodecName使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CompressionCodecName类属于parquet.hadoop.metadata包,在下文中一共展示了CompressionCodecName类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: build

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public PFileWriter build(){
    try {
        this.parquetWriter = new ParquetWriter<Group>(
                file,
                gws,
                CompressionCodecName.SNAPPY,
                1024,
                1024,
                512,
                true,
                false,
                ParquetProperties.WriterVersion.PARQUET_1_0,
                conf);
    }catch (IOException ioe){
        LOG.error(ioe.toString());
    }
    return this;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:19,代码来源:PFileWriter.java

示例2: initWriter

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public static ParquetWriter<Group> initWriter(String fileName, Map<String, String> metas)
        throws IOException{


    GroupWriteSupport.setSchema(schema, conf);


    ParquetWriter<Group> writer = new ParquetWriter<Group>(
            initFile(fileName),
            new GroupWriteSupport(metas),
            CompressionCodecName.SNAPPY,
            1024,
            1024,
            512,
            true,
            false,
            ParquetProperties.WriterVersion.PARQUET_1_0,
            conf);

    return writer;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:22,代码来源:GenerateParquetFile.java

示例3: compressionCodecs

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
@Test
public void compressionCodecs() throws Exception {
  final int[] sizes = { 4 * 1024, 1 * 1024 * 1024 };
  final boolean[] comp = { true, false };

  try (final DeferredException ex = new DeferredException()) {
    for (final int size : sizes) {
      for (final boolean useOnHeapComp : comp) {
        for (final Decompression decomp : Decompression.values()) {
          for (final CompressionCodecName codec : CompressionCodecName.values()) {
            if (codec == CompressionCodecName.LZO) {
              // not installed as gpl.
              continue;
            }
            try {
              test(size, codec, useOnHeapComp, decomp);
            } catch (Exception e) {
              ex.addException(e);
            }
          }
        }
      }
    }
  }
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:26,代码来源:TestDirectCodecFactory.java

示例4: run

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
 * Write the file.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.IOFileOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.OUTPUT));

  AvroParquetWriter<Stock> writer =
      new AvroParquetWriter<Stock>(outputPath, Stock.SCHEMA$,
          CompressionCodecName.SNAPPY,
          ParquetWriter.DEFAULT_BLOCK_SIZE,
          ParquetWriter.DEFAULT_PAGE_SIZE,
          true);

  for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
    writer.write(stock);
  }

  writer.close();

  return 0;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:35,代码来源:ParquetAvroStockWriter.java

示例5: startColumn

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
 * start a column inside a block
 *
 * @param descriptor           the column descriptor
 * @param valueCount           the value count in this column
 * @param statistics           the statistics in this column
 * @param compressionCodecName
 * @throws IOException
 */
public void startColumn(ColumnDescriptor descriptor,
                        long valueCount,
                        CompressionCodecName compressionCodecName) throws IOException {
    state = state.startColumn();
    if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount);
    currentEncodings = new HashSet<parquet.column.Encoding>();
    currentChunkPath = ColumnPath.get(descriptor.getPath());
    currentChunkType = descriptor.getType();
    currentChunkCodec = compressionCodecName;
    currentChunkValueCount = valueCount;
    currentChunkFirstDataPage = out.getPos();
    compressedLength = 0;
    uncompressedLength = 0;
    // need to know what type of stats to initialize to
    // better way to do this?
    currentStatistics = Statistics.getStatsBasedOnType(currentChunkType);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:27,代码来源:ParquetFileWriter.java

示例6: getCodec

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
 * @param codecName the requested codec
 * @return the corresponding hadoop codec. null if UNCOMPRESSED
 */
private CompressionCodec getCodec(CompressionCodecName codecName) {
    String codecClassName = codecName.getHadoopCompressionCodecClassName();
    if (codecClassName == null) {
        return null;
    }
    CompressionCodec codec = codecByName.get(codecClassName);
    if (codec != null) {
        return codec;
    }

    try {
        Class<?> codecClass = Class.forName(codecClassName);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, configuration);
        codecByName.put(codecClassName, codec);
        return codec;
    } catch (ClassNotFoundException e) {
        throw new BadConfigurationException("Class " + codecClassName + " was not found", e);
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:24,代码来源:CodecFactory.java

示例7: ParquetWriter

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
 * Create a new ParquetWriter.
 *
 * @param file                 the file to create
 * @param writeSupport         the implementation to write a record to a RecordConsumer
 * @param compressionCodecName the compression codec to use
 * @param blockSize            the block size threshold
 * @param pageSize             the page size threshold
 * @param dictionaryPageSize   the page size threshold for the dictionary pages
 * @param enableDictionary     to turn dictionary encoding on
 * @param validating           to turn on validation using the schema
 * @param writerVersion        version of parquetWriter from {@link ParquetProperties.WriterVersion}
 * @param conf                 Hadoop configuration to use while accessing the filesystem
 * @throws IOException
 */
public ParquetWriter(
        Path file,
        WriteSupport<T> writeSupport,
        CompressionCodecName compressionCodecName,
        int blockSize,
        int pageSize,
        int dictionaryPageSize,
        boolean enableDictionary,
        boolean validating,
        WriterVersion writerVersion,
        Configuration conf) throws IOException {
    this(file, ParquetFileWriter.Mode.CREATE, writeSupport,
            compressionCodecName, blockSize, pageSize, dictionaryPageSize,
            enableDictionary, validating, writerVersion, conf);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:31,代码来源:ParquetWriter.java

示例8: testReadWrite

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private void testReadWrite(CompressionCodecName codec, Map<String, String> conf) throws IOException, ClassNotFoundException, InterruptedException {
    runMapReduceJob(codec, conf);
    final BufferedReader in = new BufferedReader(new FileReader(new File(inputPath.toString())));
    final BufferedReader out = new BufferedReader(new FileReader(new File(outputPath.toString(), "part-m-00000")));
    String lineIn;
    String lineOut = null;
    int lineNumber = 0;
    while ((lineIn = in.readLine()) != null && (lineOut = out.readLine()) != null) {
        ++lineNumber;
        lineOut = lineOut.substring(lineOut.indexOf("\t") + 1);
        assertEquals("line " + lineNumber, lineIn, lineOut);
    }
    assertNull("line " + lineNumber, out.readLine());
    assertNull("line " + lineNumber, lineIn);
    in.close();
    out.close();
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:18,代码来源:TestInputOutputFormat.java

示例9: runMapReduceJob

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {

        final FileSystem fileSystem = parquetPath.getFileSystem(conf);
        fileSystem.delete(parquetPath, true);
        fileSystem.delete(outputPath, true);
        {
            jobConf.setInputFormat(TextInputFormat.class);
            TextInputFormat.addInputPath(jobConf, inputPath);
            jobConf.setNumReduceTasks(0);

            jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
            DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
            DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
            DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
            GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);

            jobConf.setMapperClass(DeprecatedMapper.class);
            mapRedJob = JobClient.runJob(jobConf);
        }
    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:21,代码来源:DeprecatedOutputFormatTest.java

示例10: readDictionaryPage

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private static DictionaryPage readDictionaryPage(byte[] data, ParquetCodecFactory codecFactory, CompressionCodecName codecName)
{
    try {
        ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
        PageHeader pageHeader = Util.readPageHeader(inputStream);

        if (pageHeader.type != PageType.DICTIONARY_PAGE) {
            return null;
        }

        // todo this wrapper is not needed
        BytesInput compressedData = BytesInput.from(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size());

        BytesDecompressor decompressor = codecFactory.getDecompressor(codecName);
        BytesInput decompressed = decompressor.decompress(compressedData, pageHeader.getUncompressed_page_size());

        DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
        Encoding encoding = Encoding.valueOf(dicHeader.getEncoding().name());
        int dictionarySize = dicHeader.getNum_values();

        return new DictionaryPage(decompressed, dictionarySize, encoding);
    }
    catch (IOException ignored) {
        return null;
    }
}
 
开发者ID:y-lan,项目名称:presto,代码行数:27,代码来源:ParquetPredicateUtils.java

示例11: getCodec

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private CompressionCodec getCodec(CompressionCodecName codecName)
{
    String codecClassName = codecName.getHadoopCompressionCodecClassName();
    if (codecClassName == null) {
        return null;
    }
    CompressionCodec codec = codecByName.get(codecClassName);
    if (codec != null) {
        return codec;
    }

    try {
        Class<?> codecClass = Class.forName(codecClassName);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, configuration);
        codecByName.put(codecClassName, codec);
        return codec;
    }
    catch (ClassNotFoundException e) {
        throw new RuntimeException("Class " + codecClassName + " was not found", e);
    }
}
 
开发者ID:y-lan,项目名称:presto,代码行数:22,代码来源:ParquetCodecFactory.java

示例12: writeAvro

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:17,代码来源:ParquetAvroExample.java

示例13: writeThrift

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public static void writeThrift(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ParquetThriftOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	ParquetThriftOutputFormat.setThriftClass(job, Person.class);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:18,代码来源:ParquetThriftExample.java

示例14: getRecordWriter

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
 * {@inheritDoc}
 */
@Override
public RecordWriter<K, T> getRecordWriter(TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    final Configuration conf = getConfiguration(taskAttemptContext);

    CompressionCodecName codec = getCodec(taskAttemptContext);
    String extension = codec.getExtension() + ".parquet";
    TaskID taskId = taskAttemptContext.getTaskAttemptID().getTaskID();

    Path workPath = ((ParquetMultiOutputCommitter)
            getOutputCommitter(taskAttemptContext)).getWorkPath();

    return getRecordWriter(
            conf, workPath, extension, String.format("%05d", taskId.getId()), codec);
}
 
开发者ID:saikocat,项目名称:parquet-mr-contrib,代码行数:19,代码来源:ParquetMultiOutputFormat.java

示例15: TajoParquetWriter

import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
 * Create a new TajoParquetWriter.
 *
 * @param file The file name to write to.
 * @param schema The Tajo schema of the table.
 * @param compressionCodecName Compression codec to use, or
 *                             CompressionCodecName.UNCOMPRESSED.
 * @param blockSize The block size threshold.
 * @param pageSize See parquet write up. Blocks are subdivided into pages
 *                 for alignment.
 * @param enableDictionary Whether to use a dictionary to compress columns.
 * @param validating Whether to turn on validation.
 * @throws IOException
 */
public TajoParquetWriter(Path file,
                         Schema schema,
                         CompressionCodecName compressionCodecName,
                         int blockSize,
                         int pageSize,
                         boolean enableDictionary,
                         boolean validating) throws IOException {
  super(file,
        new TajoWriteSupport(schema),
        compressionCodecName,
        blockSize,
        pageSize,
        enableDictionary,
        validating);
}
 
开发者ID:gruter,项目名称:tajo-cdh,代码行数:30,代码来源:TajoParquetWriter.java


注:本文中的parquet.hadoop.metadata.CompressionCodecName类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。