当前位置: 首页>>代码示例>>Java>>正文


Java ParquetFileReader.readFooter方法代码示例

本文整理汇总了Java中parquet.hadoop.ParquetFileReader.readFooter方法的典型用法代码示例。如果您正苦于以下问题:Java ParquetFileReader.readFooter方法的具体用法?Java ParquetFileReader.readFooter怎么用?Java ParquetFileReader.readFooter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在parquet.hadoop.ParquetFileReader的用法示例。


在下文中一共展示了ParquetFileReader.readFooter方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSchema

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path path) throws IOException {
    DatasetJsonRecord record = null;
    if (!fs.exists(path))
        LOG.error("file path : {} not in hdfs", path);
    else {
        try {
            ParquetMetadata readFooter = ParquetFileReader.readFooter(fs.getConf(), path, ParquetMetadataConverter.NO_FILTER);
            Map<String, String> schema = readFooter.getFileMetaData().getKeyValueMetaData();
            String allFields = schema.get("org.apache.spark.sql.parquet.row.metadata");
            FileStatus status = fs.getFileStatus(path);
            String storage = STORAGE_TYPE;
            String abstractPath = path.toUri().getPath();
            String codec = "parquet.codec";
            record = new DatasetJsonRecord(allFields, abstractPath, status.getModificationTime(), status.getOwner(), status.getGroup(),
                    status.getPermission().toString(), codec, storage, "");
            LOG.info("parquetfileanalyzer parse path :{},schema is {}", path.toUri().getPath(), record.toCsvString());

        } catch (Exception e) {
            LOG.error("path : {} content " + " is not Parquet File format content  ", path.toUri().getPath());
            LOG.info(e.getStackTrace().toString());
        }
    }
    return record;

}
 
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:27,代码来源:ParquetFileAnalyzer.java

示例2: convertParquetSchemaToKettleWithTwoValidRows

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Test
public void convertParquetSchemaToKettleWithTwoValidRows() throws Exception {

  int pentahoValueMetaTypeFirstRow = 2;
  boolean allowNullFirstRow = false;
  int pentahoValueMetaTypeSecondRow = 5;
  boolean allowNullSecondRow = false;

  String expectedKettleSchema = ParquetUtils
    .createSchema( pentahoValueMetaTypeFirstRow, allowNullFirstRow, pentahoValueMetaTypeSecondRow,
      allowNullSecondRow ).marshall();

  urlTestResources = Thread.currentThread().getContextClassLoader().getResource( PARQUET_FILE );

  ConfigurationProxy conf = new ConfigurationProxy();
  conf.set( "fs.defaultFS", "file:///" );
  ParquetMetadata meta = ParquetFileReader
    .readFooter( conf, new Path( Paths.get( urlTestResources.toURI() ).toString() ),
      ParquetMetadataConverter.NO_FILTER );
  MessageType schema = meta.getFileMetaData().getSchema();

  SchemaDescription kettleSchema = ParquetConverter.createSchemaDescription( schema );
  String marshallKettleSchema = kettleSchema.marshall();
  Assert.assertEquals( marshallKettleSchema, expectedKettleSchema );
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:26,代码来源:ParquetConverterTest.java

示例3: execute

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);

  String[] args = options.getArgs();
  String input = args[0];
  
  Configuration conf = new Configuration();
  ParquetMetadata metaData = ParquetFileReader.readFooter(conf, new Path(input));

  PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
                                           .withAutoColumn()
                                           .withAutoCrop()
                                           .withWhitespaceHandler(WhiteSpaceHandler.COLLAPSE_WHITESPACE)
                                           .withColumnPadding(1)
                                           .build();

  MetadataUtils.showDetails(out, metaData);
  out.flushColumns();
}
 
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:21,代码来源:ShowMetaCommand.java

示例4: getMetadatas

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
public static ParquetMetadata[] getMetadatas (FileStatus[] fileStatuses, Configuration conf) throws IOException
{
    ParquetMetadata[] res = new ParquetMetadata[fileStatuses.length];
    for (int i = 0; i < fileStatuses.length; ++i)
    {
        res[i] = ParquetFileReader.readFooter(conf, fileStatuses[i].getPath(), NO_FILTER);
    }
    return res;
}
 
开发者ID:dbiir,项目名称:rainbow,代码行数:10,代码来源:LocalParquetEvaluator.java

示例5: getDatasetDescriptorFromParquetFile

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
    throws IOException {

  ArrayList<FileStatus> files = new ArrayList<FileStatus>();
  FileStatus[] dirs;
  dirs = fs.globStatus(fs.makeQualified(getInputPath()));
  for (int i = 0; (dirs != null && i < dirs.length); i++) {
    files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
    // We only check one file, so exit the loop when we have at least
    // one.
    if (files.size() > 0) {
      break;
    }
  }

  ParquetMetadata parquetMetadata;
  try {
    parquetMetadata =
        ParquetFileReader.readFooter(job.getConfiguration(),
            fs.makeQualified(files.get(0).getPath()));
  } catch (IOException e) {
    LOG.error("Wrong file format. Please check the export file's format.", e);
    throw e;
  }
  MessageType schema = parquetMetadata.getFileMetaData().getSchema();
  Schema avroSchema = new AvroSchemaConverter().convert(schema);
  DatasetDescriptor descriptor =
      new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
          .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
  return descriptor;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:32,代码来源:HdfsOdpsImportJob.java

示例6: execute

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
@Override
public void execute(CommandLine options) throws Exception {
    super.execute(options);

    String[] args = options.getArgs();
    String input = args[0];

    Configuration conf = new Configuration();
    Path inpath = new Path(input);

    ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath);
    MessageType schema = metaData.getFileMetaData().getSchema();

    PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
                                             .withAutoColumn()
                                             .withAutoCrop()
                                             .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
                                             .withColumnPadding(1)
                                             .withMaxBufferedLines(1000000)
                                             .withFlushOnTab()
                                             .build();

    boolean showmd = !options.hasOption('m');
    boolean showdt = !options.hasOption('d');

    Set<String> showColumns = null;
    if (options.hasOption('c')) {
        String[] cols = options.getOptionValues('c');
        showColumns = new HashSet<String>(Arrays.asList(cols));
    }

    dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
}
 
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:34,代码来源:DumpCommand.java

示例7: ParquetFileMetadata

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
public ParquetFileMetadata(Configuration conf, Path hdfsFilePath) throws IOException
{
    this.metaData = ParquetFileReader.readFooter(conf, hdfsFilePath, NO_FILTER);
}
 
开发者ID:dbiir,项目名称:rainbow,代码行数:5,代码来源:ParquetFileMetadata.java

示例8: run

import parquet.hadoop.ParquetFileReader; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
if(args.length < 2) {
    LOG.error("Usage: " + getClass().getName() + " INPUTFILE OUTPUTFILE [compression]");
    return 1;
}
String inputFile = args[0];
String outputFile = args[1];
String compression = (args.length > 2) ? args[2] : "none";

Path parquetFilePath = null;
// Find a file in case a directory was passed
RemoteIterator<LocatedFileStatus> it = FileSystem.get(getConf()).listFiles(new Path(inputFile), true);
while(it.hasNext()) {
    FileStatus fs = it.next();
    if(fs.isFile()) {
	parquetFilePath = fs.getPath();
	break;
    }
}
if(parquetFilePath == null) {
    LOG.error("No file found for " + inputFile);
    return 1;
}
LOG.info("Getting schema from " + parquetFilePath);
ParquetMetadata readFooter = ParquetFileReader.readFooter(getConf(), parquetFilePath);
MessageType schema = readFooter.getFileMetaData().getSchema();
LOG.info(schema);
GroupWriteSupport.setSchema(schema, getConf());

       Job job = new Job(getConf());
       job.setJarByClass(getClass());
       job.setJobName(getClass().getName());
       job.setMapperClass(ReadRequestMap.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(ExampleInputFormat.class);
job.setOutputFormatClass(ExampleOutputFormat.class);

CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
if(compression.equalsIgnoreCase("snappy")) {
    codec = CompressionCodecName.SNAPPY;
} else if(compression.equalsIgnoreCase("gzip")) {
    codec = CompressionCodecName.GZIP;
}
LOG.info("Output compression: " + codec);
ExampleOutputFormat.setCompression(job, codec);

FileInputFormat.setInputPaths(job, new Path(inputFile));
       FileOutputFormat.setOutputPath(job, new Path(outputFile));

       job.waitForCompletion(true);

       return 0;
   }
 
开发者ID:cloudera,项目名称:parquet-examples,代码行数:54,代码来源:TestReadWriteParquet.java


注:本文中的parquet.hadoop.ParquetFileReader.readFooter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。