当前位置: 首页>>代码示例>>Java>>正文


Java CompressionCodecName.SNAPPY属性代码示例

本文整理汇总了Java中parquet.hadoop.metadata.CompressionCodecName.SNAPPY属性的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodecName.SNAPPY属性的具体用法?Java CompressionCodecName.SNAPPY怎么用?Java CompressionCodecName.SNAPPY使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在parquet.hadoop.metadata.CompressionCodecName的用法示例。


在下文中一共展示了CompressionCodecName.SNAPPY属性的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: build

public PFileWriter build(){
    try {
        this.parquetWriter = new ParquetWriter<Group>(
                file,
                gws,
                CompressionCodecName.SNAPPY,
                1024,
                1024,
                512,
                true,
                false,
                ParquetProperties.WriterVersion.PARQUET_1_0,
                conf);
    }catch (IOException ioe){
        LOG.error(ioe.toString());
    }
    return this;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:18,代码来源:PFileWriter.java

示例2: initWriter

public static ParquetWriter<Group> initWriter(String fileName, Map<String, String> metas)
        throws IOException{


    GroupWriteSupport.setSchema(schema, conf);


    ParquetWriter<Group> writer = new ParquetWriter<Group>(
            initFile(fileName),
            new GroupWriteSupport(metas),
            CompressionCodecName.SNAPPY,
            1024,
            1024,
            512,
            true,
            false,
            ParquetProperties.WriterVersion.PARQUET_1_0,
            conf);

    return writer;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:21,代码来源:GenerateParquetFile.java

示例3: run

/**
 * Write the file.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.IOFileOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.OUTPUT));

  AvroParquetWriter<Stock> writer =
      new AvroParquetWriter<Stock>(outputPath, Stock.SCHEMA$,
          CompressionCodecName.SNAPPY,
          ParquetWriter.DEFAULT_BLOCK_SIZE,
          ParquetWriter.DEFAULT_PAGE_SIZE,
          true);

  for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
    writer.write(stock);
  }

  writer.close();

  return 0;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:34,代码来源:ParquetAvroStockWriter.java

示例4: createCompressor

@Override
protected BytesCompressor createCompressor(final CompressionCodecName codecName, final CompressionCodec codec,
    int pageSize) {

  if (codec == null) {
    return new NoopCompressor();
  } else if (codecName == CompressionCodecName.SNAPPY) {
    // avoid using the Parquet Snappy codec since it allocates direct buffers at awkward spots.
    return new SnappyCompressor();
  } else {

    // todo: move zlib above since it also generates allocateDirect calls.
    return new HeapBytesCompressor(codecName, codec, pageSize);
  }
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:15,代码来源:DirectCodecFactory.java

示例5: init

@Override
public void init(Map<String, String> writerOptions) throws IOException {
  this.location = writerOptions.get("location");
  this.prefix = writerOptions.get("prefix");

  conf = new Configuration();
  conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY));
  blockSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_BLOCK_SIZE));
  pageSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_PAGE_SIZE));
  dictionaryPageSize= Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_DICT_PAGE_SIZE));
  String codecName = writerOptions.get(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).toLowerCase();
  switch(codecName) {
  case "snappy":
    codec = CompressionCodecName.SNAPPY;
    break;
  case "lzo":
    codec = CompressionCodecName.LZO;
    break;
  case "gzip":
    codec = CompressionCodecName.GZIP;
    break;
  case "none":
  case "uncompressed":
    codec = CompressionCodecName.UNCOMPRESSED;
    break;
  default:
    throw new UnsupportedOperationException(String.format("Unknown compression type: %s", codecName));
  }

  enableDictionary = Boolean.parseBoolean(writerOptions.get(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING));
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:31,代码来源:ParquetRecordWriter.java

示例6: HMAvroParquetScheme

public HMAvroParquetScheme(Schema schema, String compression) {
  this(schema);
  if(compression.equalsIgnoreCase("snappy")) {
      codec = CompressionCodecName.SNAPPY;
  } else if(compression.equalsIgnoreCase("gzip")) {
      codec = CompressionCodecName.GZIP;
  }
}
 
开发者ID:Datasio,项目名称:cascalog-avro-parquet,代码行数:8,代码来源:HMAvroParquetScheme.java

示例7: open

@Override
public void open() {
  Preconditions.checkState(state.equals(ReaderWriterState.NEW),
    "Unable to open a writer from state:%s", state);

  logger.debug(
    "Opening data file with pathTmp:{} (final path will be path:{})",
    pathTmp, path);

  try {
    CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED;
    if (enableCompression) {
       if (SnappyCodec.isNativeCodeLoaded()) {
         codecName = CompressionCodecName.SNAPPY;
       } else {
         logger.warn("Compression enabled, but Snappy native code not loaded. " +
             "Parquet file will not be compressed.");
       }
    }
    avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(pathTmp),
        schema, codecName, DEFAULT_BLOCK_SIZE,
        ParquetWriter.DEFAULT_PAGE_SIZE);
  } catch (IOException e) {
    throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e);
  }

  state = ReaderWriterState.OPEN;
}
 
开发者ID:cloudera,项目名称:cdk,代码行数:28,代码来源:ParquetFileSystemDatasetWriter.java

示例8: getCodecName

@Override
public CompressionCodecName getCodecName() {
  return CompressionCodecName.SNAPPY;
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:4,代码来源:DirectCodecFactory.java

示例9: main

public static void main(String[] args) {

		String inputFile = null;
		String outputFile = null;
		
		HelpFormatter formatter = new HelpFormatter();
		// create Options object
		Options options = new Options();

		// add t option
		options.addOption("i", true, "input avro file");
		options.addOption("o", true, "output Parquet file");
		CommandLineParser parser = new DefaultParser();
		CommandLine cmd;
		try {
			cmd = parser.parse(options, args);
			inputFile = cmd.getOptionValue("i");
			if (inputFile == null) {
				formatter.printHelp("AvroToParquet", options);
				return;
			}
			outputFile = cmd.getOptionValue("o");
		} catch (ParseException exc) {
			System.err.println("Problem with command line parameters: " + exc.getMessage());
			return;
		}

		File avroFile = new File(inputFile);

		if (!avroFile.exists()) {
			System.err.println("Could not open file: " + inputFile);
			return;
		}
		try {

			DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
			DataFileReader<GenericRecord> dataFileReader;
			dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
			Schema avroSchema = dataFileReader.getSchema();

			// choose compression scheme
			CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY;

			// set Parquet file block size and page size values
			int blockSize = 256 * 1024 * 1024;
			int pageSize = 64 * 1024;

			String base = FilenameUtils.removeExtension(avroFile.getAbsolutePath()) + ".parquet";
			if(outputFile != null) {
				File file = new File(outputFile);
				base = file.getAbsolutePath();
			}
			
			Path outputPath = new Path("file:///"+base);

			// the ParquetWriter object that will consume Avro GenericRecords
			ParquetWriter<GenericRecord> parquetWriter;
			parquetWriter = new AvroParquetWriter<GenericRecord>(outputPath, avroSchema, compressionCodecName, blockSize, pageSize);
			for (GenericRecord record : dataFileReader) {
				parquetWriter.write(record);
			}
			dataFileReader.close();
			parquetWriter.close();
		} catch (IOException e) {
			System.err.println("Caught exception: " + e.getMessage());
		}
	}
 
开发者ID:CohesionForce,项目名称:avroToParquet,代码行数:67,代码来源:AvroToParquet.java

示例10: run

public int run(String[] args) throws Exception {
if(args.length < 2) {
    LOG.error("Usage: " + getClass().getName() + " INPUTFILE OUTPUTFILE [compression]");
    return 1;
}
String inputFile = args[0];
String outputFile = args[1];
String compression = (args.length > 2) ? args[2] : "none";

Path parquetFilePath = null;
// Find a file in case a directory was passed
RemoteIterator<LocatedFileStatus> it = FileSystem.get(getConf()).listFiles(new Path(inputFile), true);
while(it.hasNext()) {
    FileStatus fs = it.next();
    if(fs.isFile()) {
	parquetFilePath = fs.getPath();
	break;
    }
}
if(parquetFilePath == null) {
    LOG.error("No file found for " + inputFile);
    return 1;
}
LOG.info("Getting schema from " + parquetFilePath);
ParquetMetadata readFooter = ParquetFileReader.readFooter(getConf(), parquetFilePath);
MessageType schema = readFooter.getFileMetaData().getSchema();
LOG.info(schema);
GroupWriteSupport.setSchema(schema, getConf());

       Job job = new Job(getConf());
       job.setJarByClass(getClass());
       job.setJobName(getClass().getName());
       job.setMapperClass(ReadRequestMap.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(ExampleInputFormat.class);
job.setOutputFormatClass(ExampleOutputFormat.class);

CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
if(compression.equalsIgnoreCase("snappy")) {
    codec = CompressionCodecName.SNAPPY;
} else if(compression.equalsIgnoreCase("gzip")) {
    codec = CompressionCodecName.GZIP;
}
LOG.info("Output compression: " + codec);
ExampleOutputFormat.setCompression(job, codec);

FileInputFormat.setInputPaths(job, new Path(inputFile));
       FileOutputFormat.setOutputPath(job, new Path(outputFile));

       job.waitForCompletion(true);

       return 0;
   }
 
开发者ID:cloudera,项目名称:parquet-examples,代码行数:53,代码来源:TestReadWriteParquet.java


注:本文中的parquet.hadoop.metadata.CompressionCodecName.SNAPPY属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。