本文整理汇总了Java中parquet.hadoop.metadata.CompressionCodecName类的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodecName类的具体用法?Java CompressionCodecName怎么用?Java CompressionCodecName使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CompressionCodecName类属于parquet.hadoop.metadata包,在下文中一共展示了CompressionCodecName类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: build
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public PFileWriter build(){
try {
this.parquetWriter = new ParquetWriter<Group>(
file,
gws,
CompressionCodecName.SNAPPY,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
conf);
}catch (IOException ioe){
LOG.error(ioe.toString());
}
return this;
}
示例2: initWriter
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public static ParquetWriter<Group> initWriter(String fileName, Map<String, String> metas)
throws IOException{
GroupWriteSupport.setSchema(schema, conf);
ParquetWriter<Group> writer = new ParquetWriter<Group>(
initFile(fileName),
new GroupWriteSupport(metas),
CompressionCodecName.SNAPPY,
1024,
1024,
512,
true,
false,
ParquetProperties.WriterVersion.PARQUET_1_0,
conf);
return writer;
}
示例3: compressionCodecs
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
@Test
public void compressionCodecs() throws Exception {
final int[] sizes = { 4 * 1024, 1 * 1024 * 1024 };
final boolean[] comp = { true, false };
try (final DeferredException ex = new DeferredException()) {
for (final int size : sizes) {
for (final boolean useOnHeapComp : comp) {
for (final Decompression decomp : Decompression.values()) {
for (final CompressionCodecName codec : CompressionCodecName.values()) {
if (codec == CompressionCodecName.LZO) {
// not installed as gpl.
continue;
}
try {
test(size, codec, useOnHeapComp, decomp);
} catch (Exception e) {
ex.addException(e);
}
}
}
}
}
}
}
示例4: run
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
* Write the file.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.IOFileOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.IOFileOpts.OUTPUT));
AvroParquetWriter<Stock> writer =
new AvroParquetWriter<Stock>(outputPath, Stock.SCHEMA$,
CompressionCodecName.SNAPPY,
ParquetWriter.DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
true);
for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
writer.write(stock);
}
writer.close();
return 0;
}
示例5: startColumn
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
* start a column inside a block
*
* @param descriptor the column descriptor
* @param valueCount the value count in this column
* @param statistics the statistics in this column
* @param compressionCodecName
* @throws IOException
*/
public void startColumn(ColumnDescriptor descriptor,
long valueCount,
CompressionCodecName compressionCodecName) throws IOException {
state = state.startColumn();
if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount);
currentEncodings = new HashSet<parquet.column.Encoding>();
currentChunkPath = ColumnPath.get(descriptor.getPath());
currentChunkType = descriptor.getType();
currentChunkCodec = compressionCodecName;
currentChunkValueCount = valueCount;
currentChunkFirstDataPage = out.getPos();
compressedLength = 0;
uncompressedLength = 0;
// need to know what type of stats to initialize to
// better way to do this?
currentStatistics = Statistics.getStatsBasedOnType(currentChunkType);
}
示例6: getCodec
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
* @param codecName the requested codec
* @return the corresponding hadoop codec. null if UNCOMPRESSED
*/
private CompressionCodec getCodec(CompressionCodecName codecName) {
String codecClassName = codecName.getHadoopCompressionCodecClassName();
if (codecClassName == null) {
return null;
}
CompressionCodec codec = codecByName.get(codecClassName);
if (codec != null) {
return codec;
}
try {
Class<?> codecClass = Class.forName(codecClassName);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, configuration);
codecByName.put(codecClassName, codec);
return codec;
} catch (ClassNotFoundException e) {
throw new BadConfigurationException("Class " + codecClassName + " was not found", e);
}
}
示例7: ParquetWriter
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
* Create a new ParquetWriter.
*
* @param file the file to create
* @param writeSupport the implementation to write a record to a RecordConsumer
* @param compressionCodecName the compression codec to use
* @param blockSize the block size threshold
* @param pageSize the page size threshold
* @param dictionaryPageSize the page size threshold for the dictionary pages
* @param enableDictionary to turn dictionary encoding on
* @param validating to turn on validation using the schema
* @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion}
* @param conf Hadoop configuration to use while accessing the filesystem
* @throws IOException
*/
public ParquetWriter(
Path file,
WriteSupport<T> writeSupport,
CompressionCodecName compressionCodecName,
int blockSize,
int pageSize,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion,
Configuration conf) throws IOException {
this(file, ParquetFileWriter.Mode.CREATE, writeSupport,
compressionCodecName, blockSize, pageSize, dictionaryPageSize,
enableDictionary, validating, writerVersion, conf);
}
示例8: testReadWrite
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private void testReadWrite(CompressionCodecName codec, Map<String, String> conf) throws IOException, ClassNotFoundException, InterruptedException {
runMapReduceJob(codec, conf);
final BufferedReader in = new BufferedReader(new FileReader(new File(inputPath.toString())));
final BufferedReader out = new BufferedReader(new FileReader(new File(outputPath.toString(), "part-m-00000")));
String lineIn;
String lineOut = null;
int lineNumber = 0;
while ((lineIn = in.readLine()) != null && (lineOut = out.readLine()) != null) {
++lineNumber;
lineOut = lineOut.substring(lineOut.indexOf("\t") + 1);
assertEquals("line " + lineNumber, lineIn, lineOut);
}
assertNull("line " + lineNumber, out.readLine());
assertNull("line " + lineNumber, lineIn);
in.close();
out.close();
}
示例9: runMapReduceJob
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {
final FileSystem fileSystem = parquetPath.getFileSystem(conf);
fileSystem.delete(parquetPath, true);
fileSystem.delete(outputPath, true);
{
jobConf.setInputFormat(TextInputFormat.class);
TextInputFormat.addInputPath(jobConf, inputPath);
jobConf.setNumReduceTasks(0);
jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);
DeprecatedParquetOutputFormat.setCompression(jobConf, codec);
DeprecatedParquetOutputFormat.setOutputPath(jobConf, parquetPath);
DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, GroupWriteSupport.class);
GroupWriteSupport.setSchema(MessageTypeParser.parseMessageType(writeSchema), jobConf);
jobConf.setMapperClass(DeprecatedMapper.class);
mapRedJob = JobClient.runJob(jobConf);
}
}
示例10: readDictionaryPage
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private static DictionaryPage readDictionaryPage(byte[] data, ParquetCodecFactory codecFactory, CompressionCodecName codecName)
{
try {
ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
PageHeader pageHeader = Util.readPageHeader(inputStream);
if (pageHeader.type != PageType.DICTIONARY_PAGE) {
return null;
}
// todo this wrapper is not needed
BytesInput compressedData = BytesInput.from(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size());
BytesDecompressor decompressor = codecFactory.getDecompressor(codecName);
BytesInput decompressed = decompressor.decompress(compressedData, pageHeader.getUncompressed_page_size());
DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
Encoding encoding = Encoding.valueOf(dicHeader.getEncoding().name());
int dictionarySize = dicHeader.getNum_values();
return new DictionaryPage(decompressed, dictionarySize, encoding);
}
catch (IOException ignored) {
return null;
}
}
示例11: getCodec
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
private CompressionCodec getCodec(CompressionCodecName codecName)
{
String codecClassName = codecName.getHadoopCompressionCodecClassName();
if (codecClassName == null) {
return null;
}
CompressionCodec codec = codecByName.get(codecClassName);
if (codec != null) {
return codec;
}
try {
Class<?> codecClass = Class.forName(codecClassName);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, configuration);
codecByName.put(codecClassName, codec);
return codec;
}
catch (ClassNotFoundException e) {
throw new RuntimeException("Class " + codecClassName + " was not found", e);
}
}
示例12: writeAvro
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
// Set up the Hadoop Input Format
Job job = Job.getInstance();
// Set up Hadoop Output Format
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
ParquetOutputFormat.setEnableDictionary(job, true);
// Output & Execute
data.output(hadoopOutputFormat);
}
示例13: writeThrift
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
public static void writeThrift(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
// Set up the Hadoop Input Format
Job job = Job.getInstance();
// Set up Hadoop Output Format
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new ParquetThriftOutputFormat(), job);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
ParquetOutputFormat.setEnableDictionary(job, true);
ParquetThriftOutputFormat.setThriftClass(job, Person.class);
// Output & Execute
data.output(hadoopOutputFormat);
}
示例14: getRecordWriter
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
* {@inheritDoc}
*/
@Override
public RecordWriter<K, T> getRecordWriter(TaskAttemptContext taskAttemptContext)
throws IOException, InterruptedException {
final Configuration conf = getConfiguration(taskAttemptContext);
CompressionCodecName codec = getCodec(taskAttemptContext);
String extension = codec.getExtension() + ".parquet";
TaskID taskId = taskAttemptContext.getTaskAttemptID().getTaskID();
Path workPath = ((ParquetMultiOutputCommitter)
getOutputCommitter(taskAttemptContext)).getWorkPath();
return getRecordWriter(
conf, workPath, extension, String.format("%05d", taskId.getId()), codec);
}
示例15: TajoParquetWriter
import parquet.hadoop.metadata.CompressionCodecName; //导入依赖的package包/类
/**
* Create a new TajoParquetWriter.
*
* @param file The file name to write to.
* @param schema The Tajo schema of the table.
* @param compressionCodecName Compression codec to use, or
* CompressionCodecName.UNCOMPRESSED.
* @param blockSize The block size threshold.
* @param pageSize See parquet write up. Blocks are subdivided into pages
* for alignment.
* @param enableDictionary Whether to use a dictionary to compress columns.
* @param validating Whether to turn on validation.
* @throws IOException
*/
public TajoParquetWriter(Path file,
Schema schema,
CompressionCodecName compressionCodecName,
int blockSize,
int pageSize,
boolean enableDictionary,
boolean validating) throws IOException {
super(file,
new TajoWriteSupport(schema),
compressionCodecName,
blockSize,
pageSize,
enableDictionary,
validating);
}