本文整理汇总了Java中parquet.hadoop.metadata.CompressionCodecName.SNAPPY属性的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodecName.SNAPPY属性的具体用法?Java CompressionCodecName.SNAPPY怎么用?Java CompressionCodecName.SNAPPY使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类parquet.hadoop.metadata.CompressionCodecName
的用法示例。
在下文中一共展示了CompressionCodecName.SNAPPY属性的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: build
public PFileWriter build(){
try {
this.parquetWriter = new ParquetWriter<Group>(
file,
gws,
CompressionCodecName.SNAPPY,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
conf);
}catch (IOException ioe){
LOG.error(ioe.toString());
}
return this;
}
示例2: initWriter
public static ParquetWriter<Group> initWriter(String fileName, Map<String, String> metas)
throws IOException{
GroupWriteSupport.setSchema(schema, conf);
ParquetWriter<Group> writer = new ParquetWriter<Group>(
initFile(fileName),
new GroupWriteSupport(metas),
CompressionCodecName.SNAPPY,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
conf);
return writer;
}
示例3: run
/**
* Write the file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.IOFileOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.OUTPUT));
AvroParquetWriter<Stock> writer =
new AvroParquetWriter<Stock>(outputPath, Stock.SCHEMA$,
CompressionCodecName.SNAPPY,
ParquetWriter.DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
true);
for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
writer.write(stock);
}
writer.close();
return 0;
}
示例4: createCompressor
@Override
protected BytesCompressor createCompressor(final CompressionCodecName codecName, final CompressionCodec codec,
int pageSize) {
if (codec == null) {
return new NoopCompressor();
} else if (codecName == CompressionCodecName.SNAPPY) {
// avoid using the Parquet Snappy codec since it allocates direct buffers at awkward spots.
return new SnappyCompressor();
} else {
// todo: move zlib above since it also generates allocateDirect calls.
return new HeapBytesCompressor(codecName, codec, pageSize);
}
}
示例5: init
@Override
public void init(Map<String, String> writerOptions) throws IOException {
this.location = writerOptions.get("location");
this.prefix = writerOptions.get("prefix");
conf = new Configuration();
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, writerOptions.get(FileSystem.FS_DEFAULT_NAME_KEY));
blockSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_BLOCK_SIZE));
pageSize = Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_PAGE_SIZE));
dictionaryPageSize= Integer.parseInt(writerOptions.get(ExecConstants.PARQUET_DICT_PAGE_SIZE));
String codecName = writerOptions.get(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE).toLowerCase();
switch(codecName) {
case "snappy":
codec = CompressionCodecName.SNAPPY;
break;
case "lzo":
codec = CompressionCodecName.LZO;
break;
case "gzip":
codec = CompressionCodecName.GZIP;
break;
case "none":
case "uncompressed":
codec = CompressionCodecName.UNCOMPRESSED;
break;
default:
throw new UnsupportedOperationException(String.format("Unknown compression type: %s", codecName));
}
enableDictionary = Boolean.parseBoolean(writerOptions.get(ExecConstants.PARQUET_WRITER_ENABLE_DICTIONARY_ENCODING));
}
示例6: HMAvroParquetScheme
public HMAvroParquetScheme(Schema schema, String compression) {
this(schema);
if(compression.equalsIgnoreCase("snappy")) {
codec = CompressionCodecName.SNAPPY;
} else if(compression.equalsIgnoreCase("gzip")) {
codec = CompressionCodecName.GZIP;
}
}
示例7: open
@Override
public void open() {
Preconditions.checkState(state.equals(ReaderWriterState.NEW),
"Unable to open a writer from state:%s", state);
logger.debug(
"Opening data file with pathTmp:{} (final path will be path:{})",
pathTmp, path);
try {
CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED;
if (enableCompression) {
if (SnappyCodec.isNativeCodeLoaded()) {
codecName = CompressionCodecName.SNAPPY;
} else {
logger.warn("Compression enabled, but Snappy native code not loaded. " +
"Parquet file will not be compressed.");
}
}
avroParquetWriter = new AvroParquetWriter<E>(fileSystem.makeQualified(pathTmp),
schema, codecName, DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE);
} catch (IOException e) {
throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e);
}
state = ReaderWriterState.OPEN;
}
示例8: getCodecName
@Override
public CompressionCodecName getCodecName() {
return CompressionCodecName.SNAPPY;
}
示例9: main
public static void main(String[] args) {
String inputFile = null;
String outputFile = null;
HelpFormatter formatter = new HelpFormatter();
// create Options object
Options options = new Options();
// add t option
options.addOption("i", true, "input avro file");
options.addOption("o", true, "output Parquet file");
CommandLineParser parser = new DefaultParser();
CommandLine cmd;
try {
cmd = parser.parse(options, args);
inputFile = cmd.getOptionValue("i");
if (inputFile == null) {
formatter.printHelp("AvroToParquet", options);
return;
}
outputFile = cmd.getOptionValue("o");
} catch (ParseException exc) {
System.err.println("Problem with command line parameters: " + exc.getMessage());
return;
}
File avroFile = new File(inputFile);
if (!avroFile.exists()) {
System.err.println("Could not open file: " + inputFile);
return;
}
try {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader;
dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
Schema avroSchema = dataFileReader.getSchema();
// choose compression scheme
CompressionCodecName compressionCodecName = CompressionCodecName.SNAPPY;
// set Parquet file block size and page size values
int blockSize = 256 * 1024 * 1024;
int pageSize = 64 * 1024;
String base = FilenameUtils.removeExtension(avroFile.getAbsolutePath()) + ".parquet";
if(outputFile != null) {
File file = new File(outputFile);
base = file.getAbsolutePath();
}
Path outputPath = new Path("file:///"+base);
// the ParquetWriter object that will consume Avro GenericRecords
ParquetWriter<GenericRecord> parquetWriter;
parquetWriter = new AvroParquetWriter<GenericRecord>(outputPath, avroSchema, compressionCodecName, blockSize, pageSize);
for (GenericRecord record : dataFileReader) {
parquetWriter.write(record);
}
dataFileReader.close();
parquetWriter.close();
} catch (IOException e) {
System.err.println("Caught exception: " + e.getMessage());
}
}
示例10: run
public int run(String[] args) throws Exception {
if(args.length < 2) {
LOG.error("Usage: " + getClass().getName() + " INPUTFILE OUTPUTFILE [compression]");
return 1;
}
String inputFile = args[0];
String outputFile = args[1];
String compression = (args.length > 2) ? args[2] : "none";
Path parquetFilePath = null;
// Find a file in case a directory was passed
RemoteIterator<LocatedFileStatus> it = FileSystem.get(getConf()).listFiles(new Path(inputFile), true);
while(it.hasNext()) {
FileStatus fs = it.next();
if(fs.isFile()) {
parquetFilePath = fs.getPath();
break;
}
}
if(parquetFilePath == null) {
LOG.error("No file found for " + inputFile);
return 1;
}
LOG.info("Getting schema from " + parquetFilePath);
ParquetMetadata readFooter = ParquetFileReader.readFooter(getConf(), parquetFilePath);
MessageType schema = readFooter.getFileMetaData().getSchema();
LOG.info(schema);
GroupWriteSupport.setSchema(schema, getConf());
Job job = new Job(getConf());
job.setJarByClass(getClass());
job.setJobName(getClass().getName());
job.setMapperClass(ReadRequestMap.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(ExampleInputFormat.class);
job.setOutputFormatClass(ExampleOutputFormat.class);
CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
if(compression.equalsIgnoreCase("snappy")) {
codec = CompressionCodecName.SNAPPY;
} else if(compression.equalsIgnoreCase("gzip")) {
codec = CompressionCodecName.GZIP;
}
LOG.info("Output compression: " + codec);
ExampleOutputFormat.setCompression(job, codec);
FileInputFormat.setInputPaths(job, new Path(inputFile));
FileOutputFormat.setOutputPath(job, new Path(outputFile));
job.waitForCompletion(true);
return 0;
}