本文整理汇总了Java中parquet.hadoop.ParquetFileReader.readFooter方法的典型用法代码示例。如果您正苦于以下问题:Java ParquetFileReader.readFooter方法的具体用法?Java ParquetFileReader.readFooter怎么用?Java ParquetFileReader.readFooter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类parquet.hadoop.ParquetFileReader
的用法示例。
在下文中一共展示了ParquetFileReader.readFooter方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSchema
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path path) throws IOException {
DatasetJsonRecord record = null;
if (!fs.exists(path))
LOG.error("file path : {} not in hdfs", path);
else {
try {
ParquetMetadata readFooter = ParquetFileReader.readFooter(fs.getConf(), path, ParquetMetadataConverter.NO_FILTER);
Map<String, String> schema = readFooter.getFileMetaData().getKeyValueMetaData();
String allFields = schema.get("org.apache.spark.sql.parquet.row.metadata");
FileStatus status = fs.getFileStatus(path);
String storage = STORAGE_TYPE;
String abstractPath = path.toUri().getPath();
String codec = "parquet.codec";
record = new DatasetJsonRecord(allFields, abstractPath, status.getModificationTime(), status.getOwner(), status.getGroup(),
status.getPermission().toString(), codec, storage, "");
LOG.info("parquetfileanalyzer parse path :{},schema is {}", path.toUri().getPath(), record.toCsvString());
} catch (Exception e) {
LOG.error("path : {} content " + " is not Parquet File format content ", path.toUri().getPath());
LOG.info(e.getStackTrace().toString());
}
}
return record;
}
示例2: convertParquetSchemaToKettleWithTwoValidRows
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Test
public void convertParquetSchemaToKettleWithTwoValidRows() throws Exception {
int pentahoValueMetaTypeFirstRow = 2;
boolean allowNullFirstRow = false;
int pentahoValueMetaTypeSecondRow = 5;
boolean allowNullSecondRow = false;
String expectedKettleSchema = ParquetUtils
.createSchema( pentahoValueMetaTypeFirstRow, allowNullFirstRow, pentahoValueMetaTypeSecondRow,
allowNullSecondRow ).marshall();
urlTestResources = Thread.currentThread().getContextClassLoader().getResource( PARQUET_FILE );
ConfigurationProxy conf = new ConfigurationProxy();
conf.set( "fs.defaultFS", "file:///" );
ParquetMetadata meta = ParquetFileReader
.readFooter( conf, new Path( Paths.get( urlTestResources.toURI() ).toString() ),
ParquetMetadataConverter.NO_FILTER );
MessageType schema = meta.getFileMetaData().getSchema();
SchemaDescription kettleSchema = ParquetConverter.createSchemaDescription( schema );
String marshallKettleSchema = kettleSchema.marshall();
Assert.assertEquals( marshallKettleSchema, expectedKettleSchema );
}
示例3: execute
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
Configuration conf = new Configuration();
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, new Path(input));
PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
.withAutoColumn()
.withAutoCrop()
.withWhitespaceHandler(WhiteSpaceHandler.COLLAPSE_WHITESPACE)
.withColumnPadding(1)
.build();
MetadataUtils.showDetails(out, metaData);
out.flushColumns();
}
示例4: getMetadatas
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
public static ParquetMetadata[] getMetadatas (FileStatus[] fileStatuses, Configuration conf) throws IOException
{
ParquetMetadata[] res = new ParquetMetadata[fileStatuses.length];
for (int i = 0; i < fileStatuses.length; ++i)
{
res[i] = ParquetFileReader.readFooter(conf, fileStatuses[i].getPath(), NO_FILTER);
}
return res;
}
示例5: getDatasetDescriptorFromParquetFile
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
throws IOException {
ArrayList<FileStatus> files = new ArrayList<FileStatus>();
FileStatus[] dirs;
dirs = fs.globStatus(fs.makeQualified(getInputPath()));
for (int i = 0; (dirs != null && i < dirs.length); i++) {
files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
// We only check one file, so exit the loop when we have at least
// one.
if (files.size() > 0) {
break;
}
}
ParquetMetadata parquetMetadata;
try {
parquetMetadata =
ParquetFileReader.readFooter(job.getConfiguration(),
fs.makeQualified(files.get(0).getPath()));
} catch (IOException e) {
LOG.error("Wrong file format. Please check the export file's format.", e);
throw e;
}
MessageType schema = parquetMetadata.getFileMetaData().getSchema();
Schema avroSchema = new AvroSchemaConverter().convert(schema);
DatasetDescriptor descriptor =
new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
.compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
return descriptor;
}
示例6: execute
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
Configuration conf = new Configuration();
Path inpath = new Path(input);
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath);
MessageType schema = metaData.getFileMetaData().getSchema();
PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
.withAutoColumn()
.withAutoCrop()
.withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
.withColumnPadding(1)
.withMaxBufferedLines(1000000)
.withFlushOnTab()
.build();
boolean showmd = !options.hasOption('m');
boolean showdt = !options.hasOption('d');
Set<String> showColumns = null;
if (options.hasOption('c')) {
String[] cols = options.getOptionValues('c');
showColumns = new HashSet<String>(Arrays.asList(cols));
}
dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
}
示例7: ParquetFileMetadata
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
public ParquetFileMetadata(Configuration conf, Path hdfsFilePath) throws IOException
{
this.metaData = ParquetFileReader.readFooter(conf, hdfsFilePath, NO_FILTER);
}
示例8: run
import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
if(args.length < 2) {
LOG.error("Usage: " + getClass().getName() + " INPUTFILE OUTPUTFILE [compression]");
return 1;
}
String inputFile = args[0];
String outputFile = args[1];
String compression = (args.length > 2) ? args[2] : "none";
Path parquetFilePath = null;
// Find a file in case a directory was passed
RemoteIterator<LocatedFileStatus> it = FileSystem.get(getConf()).listFiles(new Path(inputFile), true);
while(it.hasNext()) {
FileStatus fs = it.next();
if(fs.isFile()) {
parquetFilePath = fs.getPath();
break;
}
}
if(parquetFilePath == null) {
LOG.error("No file found for " + inputFile);
return 1;
}
LOG.info("Getting schema from " + parquetFilePath);
ParquetMetadata readFooter = ParquetFileReader.readFooter(getConf(), parquetFilePath);
MessageType schema = readFooter.getFileMetaData().getSchema();
LOG.info(schema);
GroupWriteSupport.setSchema(schema, getConf());
Job job = new Job(getConf());
job.setJarByClass(getClass());
job.setJobName(getClass().getName());
job.setMapperClass(ReadRequestMap.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(ExampleInputFormat.class);
job.setOutputFormatClass(ExampleOutputFormat.class);
CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
if(compression.equalsIgnoreCase("snappy")) {
codec = CompressionCodecName.SNAPPY;
} else if(compression.equalsIgnoreCase("gzip")) {
codec = CompressionCodecName.GZIP;
}
LOG.info("Output compression: " + codec);
ExampleOutputFormat.setCompression(job, codec);
FileInputFormat.setInputPaths(job, new Path(inputFile));
FileOutputFormat.setOutputPath(job, new Path(outputFile));
job.waitForCompletion(true);
return 0;
}