当前位置: 首页>>代码示例>>Java>>正文


Java OrcFile类代码示例

本文整理汇总了Java中org.apache.hadoop.hive.ql.io.orc.OrcFile的典型用法代码示例。如果您正苦于以下问题:Java OrcFile类的具体用法?Java OrcFile怎么用?Java OrcFile使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


OrcFile类属于org.apache.hadoop.hive.ql.io.orc包,在下文中一共展示了OrcFile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSchema

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
        throws IOException {
    DatasetJsonRecord datasetJsonRecord = null;
    try {
        Reader orcReader = OrcFile.createReader(fs, targetFilePath);
        String codec = String.valueOf(orcReader.getCompression());
        String schemaString = orcReader.getObjectInspector().getTypeName();
        String storage = STORAGE_TYPE;
        String abstractPath = targetFilePath.toUri().getPath();
        FileStatus fstat = fs.getFileStatus(targetFilePath);
        datasetJsonRecord =
                new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
                        fstat.getPermission().toString(), codec, storage, "");
    } catch (Exception e) {
        LOG.error("path : {} content " + " is not ORC File format content  ",targetFilePath.toUri().getPath());
        LOG.info(e.getStackTrace().toString());
    }

    return datasetJsonRecord;
}
 
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:22,代码来源:OrcFileAnalyzer.java

示例2: getSampleData

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
        throws IOException {
    SampleDataRecord sampleDataRecord = null;
    try {
        Reader orcReader = OrcFile.createReader(fs, targetFilePath);
        RecordReader recordReader = orcReader.rows();
        int count = 0;
        List<Object> list = new ArrayList<Object>();
        Object row = null;
        while (recordReader.hasNext() && count < 10) {
            count++;
            row = recordReader.next(row);
            list.add(row.toString().replaceAll("[\\n\\r\\p{C}]", ""));
        }
        sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
    } catch (Exception e) {
        LOG.error("path : {} content " + " is not ORC File format content  ",targetFilePath.toUri().getPath());
        LOG.info(e.getStackTrace().toString());
    }
    return sampleDataRecord;
}
 
开发者ID:thomas-young-2013,项目名称:wherehowsX,代码行数:23,代码来源:OrcFileAnalyzer.java

示例3: getORCRecords

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
public List<OrcStruct> getORCRecords(String storeBaseDir, String tableName) throws IOException {
  List<OrcStruct> orcrecords = new ArrayList<>();
  try {
    FileSystem fs = FileSystem.get(conf);
    Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir);
    Path tablePath = new Path(storeBasePath, tableName);
    if (fs.exists(tablePath)) {
      RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
          fs.listFiles(tablePath, false);
      while (locatedFileStatusRemoteIterator.hasNext()) {
        LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
        final org.apache.hadoop.hive.ql.io.orc.Reader fis =
            OrcFile.createReader(next.getPath(), OrcFile.readerOptions(conf));
        RecordReader rows = fis.rows();
        while (rows.hasNext()) {
          orcrecords.add((OrcStruct) rows.next(null));
        }
        System.out.println("File name is " + next.getPath());
      }
    }
  } catch (IOException e) {
    e.printStackTrace();
  }
  return orcrecords;
}
 
开发者ID:ampool,项目名称:monarch,代码行数:26,代码来源:HDFSQuasiService.java

示例4: testWriteOrcBytesAndReadAsFile

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
/**
 * Convert rows into ORC bytes and write to a file. Then read using ORC file reader and assert the
 * expected values.
 *
 * @throws IOException if not able to write or read the data
 */
@Test
public void testWriteOrcBytesAndReadAsFile() throws IOException {
  final File file = new File("/tmp/tmp_1.orc");
  file.deleteOnExit();

  final OrcFile.WriterOptions wOpts =
      OrcFile.writerOptions(new Configuration()).inspector(ROW_OI);
  final OrcFile.ReaderOptions rOpts = new OrcFile.ReaderOptions(new Configuration());

  final AWriter aWriter = OrcUtils.createWriter(wOpts);
  writeUsingOrcWriter(aWriter);
  final byte[] memOrcBytes = aWriter.getBytes();

  FileUtils.writeByteArrayToFile(file, memOrcBytes);

  readAndAssertUsingReader(OrcFile.createReader(new Path(file.toString()), rOpts).rows());
}
 
开发者ID:ampool,项目名称:monarch,代码行数:24,代码来源:AOrcReaderWriterTest.java

示例5: testWriteFileAndReadAsBytes

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
/**
 * Write ORC file using generic file base ORC writer and then read bytes of the file and read and
 * assert using memory (byte-array) based reader.
 *
 * @throws IOException if not able to write or read the data
 */
@Test
public void testWriteFileAndReadAsBytes() throws IOException {
  final File file = new File("/tmp/tmp_2.orc");
  file.deleteOnExit();

  final OrcFile.WriterOptions wOpts =
      OrcFile.writerOptions(new Configuration()).inspector(ROW_OI);
  final OrcFile.ReaderOptions rOpts = new OrcFile.ReaderOptions(new Configuration());

  final Writer writer = OrcFile.createWriter(new Path(file.toString()), wOpts);
  writeUsingOrcWriter(writer);
  final byte[] memOrcBytes = FileUtils.readFileToByteArray(file);

  readAndAssertUsingReader(OrcUtils.createReader(memOrcBytes, rOpts).rows());
}
 
开发者ID:ampool,项目名称:monarch,代码行数:22,代码来源:AOrcReaderWriterTest.java

示例6: testWithPredicates

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
/**
 * Test to verify that with predicates (filters) pushed to ORC reader layer, it returns all rows
 * from the respective row groups where the row is supposed to be found. It does not return the
 * specific rows as the statistics is maintained per row-group.
 *
 * @param f the filters to be evaluated
 * @param expectedCount expected number of rows returned by ORC reader
 * @throws IOException in case there was an error reading/writing
 */
@Test
@Parameters(method = "dataWithPredicates")
public void testWithPredicates(final Filter f, final int expectedCount) throws IOException {
  final OrcFile.ReaderOptions rOpts = new OrcFile.ReaderOptions(new Configuration());

  final OrcUtils.OrcOptions orcOptions = new OrcUtils.OrcOptions(f, TD);

  final RecordReader rows = OrcUtils.createReader(ORC_BYTES_WITH_MULTIPLE_STRIDES, rOpts)
      .rowsOptions(orcOptions.getOptions());
  int count = 0;
  Object row = null;
  while (rows.hasNext()) {
    row = rows.next(row);
    count++;
  }
  rows.close();
  assertEquals("Incorrect number of rows retrieved from scan.", expectedCount, count);
}
 
开发者ID:ampool,项目名称:monarch,代码行数:28,代码来源:AOrcReaderWriterTest.java

示例7: getSchema

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
  throws IOException {
  Reader orcReader = OrcFile.createReader(fs, targetFilePath);
  String codec = String.valueOf(orcReader.getCompression());
  String schemaString = orcReader.getObjectInspector().getTypeName();
  String storage = STORAGE_TYPE;
  String abstractPath = targetFilePath.toUri().getPath();

  FileStatus fstat = fs.getFileStatus(targetFilePath);
  DatasetJsonRecord datasetJsonRecord =
    new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
      fstat.getPermission().toString(), codec, storage, "");

  return datasetJsonRecord;
}
 
开发者ID:linkedin,项目名称:WhereHows,代码行数:17,代码来源:OrcFileAnalyzer.java

示例8: getSampleData

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
  throws IOException {
  Reader orcReader = OrcFile.createReader(fs, targetFilePath);
  RecordReader recordReader = orcReader.rows();
  int count = 0;
  List<Object> list = new ArrayList<Object>();
  Object row = null;
  while (recordReader.hasNext() && count < 10) {
    count++;
    row = recordReader.next(row);
    list.add(row.toString().replaceAll("[\\n\\r\\p{C}]", ""));
  }
  SampleDataRecord sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
  return sampleDataRecord;
}
 
开发者ID:linkedin,项目名称:WhereHows,代码行数:17,代码来源:OrcFileAnalyzer.java

示例9: typical

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Test
public void typical() throws IOException {
  TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<a:string>");
  ObjectInspector inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
  WriterOptions options = OrcFile.writerOptions(conf).inspector(inspector);

  Path path = new Path(temporaryFolder.getRoot().getCanonicalPath(), "part-00000");

  Writer writer = OrcFile.createWriter(path, options);
  writer.addRow(Arrays.asList("hello"));
  writer.close();

  try (OrcReader reader = new OrcReader(conf, path)) {
    List<Object> next = reader.next();
    assertThat(next.size(), is(1));
    assertThat(next.get(0), is((Object) "hello"));
    assertThat(reader.hasNext(), is(false));
  }

}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:21,代码来源:OrcReaderTest.java

示例10: typical

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Test
public void typical() throws IOException {
  Path path = new Path(temporaryFolder.getRoot().getCanonicalPath(), "part-00000");

  try (OrcWriter writer = new OrcWriter.Builder(conf, path).addField("a", TypeInfoFactory.stringTypeInfo).build()) {
    writer.addRow("hello");
  }

  ReaderOptions options = OrcFile.readerOptions(conf);
  Reader reader = OrcFile.createReader(path, options);
  RecordReader rows = reader.rows();

  @SuppressWarnings("unchecked")
  List<Object> next = (List<Object>) ObjectInspectorUtils.copyToStandardJavaObject(rows.next(null),
      reader.getObjectInspector());
  assertThat(next.size(), is(1));
  assertThat(next.get(0), is((Object) "hello"));
  assertThat(rows.hasNext(), is(false));

  rows.close();
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:22,代码来源:OrcWriterTest.java

示例11: getShipFiles

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public List<String> getShipFiles() {
    List<String> cacheFiles = new ArrayList<String>();
    String hadoopVersion = "20S";
    if (Utils.isHadoop23() || Utils.isHadoop2()) {
        hadoopVersion = "23";
    }
    Class hadoopVersionShimsClass;
    try {
        hadoopVersionShimsClass = Class.forName("org.apache.hadoop.hive.shims.Hadoop" +
                hadoopVersion + "Shims");
    } catch (ClassNotFoundException e) {
        throw new RuntimeException("Cannot find Hadoop" + hadoopVersion + "ShimsClass in classpath");
    }
    Class[] classList = new Class[] {OrcFile.class, HiveConf.class, AbstractSerDe.class,
            org.apache.hadoop.hive.shims.HadoopShims.class, HadoopShimsSecure.class, hadoopVersionShimsClass,
            Input.class};
    return FuncUtils.getShipFiles(classList);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:20,代码来源:OrcStorage.java

示例12: testMultiStore

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Test
public void testMultiStore() throws Exception {
    pigServer.setBatchOn();
    pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);");
    pigServer.registerQuery("B = order A by a0;");
    pigServer.registerQuery("store B into '" + OUTPUT2 + "' using OrcStorage();");
    pigServer.registerQuery("store B into '" + OUTPUT3 +"' using OrcStorage('-c SNAPPY');");
    pigServer.executeBatch();

    Path outputFilePath = new Path(new Path(OUTPUT2), "part-r-00000");
    Reader reader = OrcFile.createReader(fs, outputFilePath);
    assertEquals(reader.getNumberOfRows(), 2);
    assertEquals(reader.getCompression(), CompressionKind.ZLIB);

    Path outputFilePath2 = new Path(new Path(OUTPUT3), "part-r-00000");
    reader = OrcFile.createReader(fs, outputFilePath2);
    assertEquals(reader.getNumberOfRows(), 2);
    assertEquals(reader.getCompression(), CompressionKind.SNAPPY);

    verifyData(outputFilePath, outputFilePath2, fs, 2);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:22,代码来源:TestOrcStorage.java

示例13: OrcStreamWriter

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
public OrcStreamWriter( final Configuration config, final Path path, final String schema ) throws IOException{
  FileSystem fs = FileSystem.get(config);
  long stripeSize = HiveConf.getLongVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE);
  CompressionKind compress = CompressionKind.valueOf(HiveConf.getVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS));
  int bufferSize = HiveConf.getIntVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE);
  int rowIndexStride =  HiveConf.getIntVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE);

  TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( schema );
  ObjectInspector inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( typeInfo );
  writer = OrcFile.createWriter( fs, path, config, inspector, stripeSize, compress, bufferSize, rowIndexStride );
  formatter = OrcFormatterFactory.get( typeInfo );
}
 
开发者ID:yahoojapan,项目名称:dataplatform-schema-lib,代码行数:13,代码来源:OrcStreamWriter.java

示例14: getOrcObjectInspector

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
public static ObjectInspector getOrcObjectInspector( final Configuration config , final Path path ) throws IOException{
  Random rnd = new Random();

  FileSystem fs = FileSystem.get( config );
  FileStatus[] fsStatus = fs.listStatus( path );
  Path sampleOrcPath = fsStatus[rnd.nextInt( fsStatus.length )].getPath();
  Reader reader = OrcFile.createReader( sampleOrcPath , OrcFile.readerOptions( config ) );
  return reader.getObjectInspector();
}
 
开发者ID:yahoojapan,项目名称:dataplatform-schema-lib,代码行数:10,代码来源:OrcSchemaUtil.java

示例15: dumpOrcRecordsFromFile

import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
private static void dumpOrcRecordsFromFile(File file) throws IOException {
  StringBuilder metaInfo = new StringBuilder(1024);

  metaInfo.append("File: " + file.getAbsolutePath());
  Reader reader = null;
  try {
    reader = OrcFile.createReader(new Path(file.toPath().toString()),
        OrcFile.readerOptions(new Configuration()));
  } catch (Exception e) {
    System.err.println("Exception caught for file: " + file);
    e.printStackTrace();
  }
  StoreRecord record = null;
  int numRecords = 0;
  BlockKey firstBlkKey = null;
  BlockKey lastBlkKey = null;
  RecordReader rows = reader.rows();
  Object row = null;
  while (rows.hasNext()) {
    numRecords++;
    record = (StoreRecord) rows.next(row);
    if (dumpData) {
      StringBuilder dataInfo = new StringBuilder(1024000);
      dataInfo.append("\nRecord " + numRecords + ":" + record);
      System.out.println(dataInfo.toString());
    }
  }
}
 
开发者ID:ampool,项目名称:monarch,代码行数:29,代码来源:OrcFileDump.java


注:本文中的org.apache.hadoop.hive.ql.io.orc.OrcFile类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。