本文整理汇总了Java中org.apache.hadoop.hive.ql.io.orc.OrcFile类的典型用法代码示例。如果您正苦于以下问题:Java OrcFile类的具体用法?Java OrcFile怎么用?Java OrcFile使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
OrcFile类属于org.apache.hadoop.hive.ql.io.orc包,在下文中一共展示了OrcFile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSchema
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
DatasetJsonRecord datasetJsonRecord = null;
try {
Reader orcReader = OrcFile.createReader(fs, targetFilePath);
String codec = String.valueOf(orcReader.getCompression());
String schemaString = orcReader.getObjectInspector().getTypeName();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), codec, storage, "");
} catch (Exception e) {
LOG.error("path : {} content " + " is not ORC File format content ",targetFilePath.toUri().getPath());
LOG.info(e.getStackTrace().toString());
}
return datasetJsonRecord;
}
示例2: getSampleData
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
throws IOException {
SampleDataRecord sampleDataRecord = null;
try {
Reader orcReader = OrcFile.createReader(fs, targetFilePath);
RecordReader recordReader = orcReader.rows();
int count = 0;
List<Object> list = new ArrayList<Object>();
Object row = null;
while (recordReader.hasNext() && count < 10) {
count++;
row = recordReader.next(row);
list.add(row.toString().replaceAll("[\\n\\r\\p{C}]", ""));
}
sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
} catch (Exception e) {
LOG.error("path : {} content " + " is not ORC File format content ",targetFilePath.toUri().getPath());
LOG.info(e.getStackTrace().toString());
}
return sampleDataRecord;
}
示例3: getORCRecords
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
public List<OrcStruct> getORCRecords(String storeBaseDir, String tableName) throws IOException {
List<OrcStruct> orcrecords = new ArrayList<>();
try {
FileSystem fs = FileSystem.get(conf);
Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir);
Path tablePath = new Path(storeBasePath, tableName);
if (fs.exists(tablePath)) {
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
fs.listFiles(tablePath, false);
while (locatedFileStatusRemoteIterator.hasNext()) {
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
final org.apache.hadoop.hive.ql.io.orc.Reader fis =
OrcFile.createReader(next.getPath(), OrcFile.readerOptions(conf));
RecordReader rows = fis.rows();
while (rows.hasNext()) {
orcrecords.add((OrcStruct) rows.next(null));
}
System.out.println("File name is " + next.getPath());
}
}
} catch (IOException e) {
e.printStackTrace();
}
return orcrecords;
}
示例4: testWriteOrcBytesAndReadAsFile
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
/**
* Convert rows into ORC bytes and write to a file. Then read using ORC file reader and assert the
* expected values.
*
* @throws IOException if not able to write or read the data
*/
@Test
public void testWriteOrcBytesAndReadAsFile() throws IOException {
final File file = new File("/tmp/tmp_1.orc");
file.deleteOnExit();
final OrcFile.WriterOptions wOpts =
OrcFile.writerOptions(new Configuration()).inspector(ROW_OI);
final OrcFile.ReaderOptions rOpts = new OrcFile.ReaderOptions(new Configuration());
final AWriter aWriter = OrcUtils.createWriter(wOpts);
writeUsingOrcWriter(aWriter);
final byte[] memOrcBytes = aWriter.getBytes();
FileUtils.writeByteArrayToFile(file, memOrcBytes);
readAndAssertUsingReader(OrcFile.createReader(new Path(file.toString()), rOpts).rows());
}
示例5: testWriteFileAndReadAsBytes
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
/**
* Write ORC file using generic file base ORC writer and then read bytes of the file and read and
* assert using memory (byte-array) based reader.
*
* @throws IOException if not able to write or read the data
*/
@Test
public void testWriteFileAndReadAsBytes() throws IOException {
final File file = new File("/tmp/tmp_2.orc");
file.deleteOnExit();
final OrcFile.WriterOptions wOpts =
OrcFile.writerOptions(new Configuration()).inspector(ROW_OI);
final OrcFile.ReaderOptions rOpts = new OrcFile.ReaderOptions(new Configuration());
final Writer writer = OrcFile.createWriter(new Path(file.toString()), wOpts);
writeUsingOrcWriter(writer);
final byte[] memOrcBytes = FileUtils.readFileToByteArray(file);
readAndAssertUsingReader(OrcUtils.createReader(memOrcBytes, rOpts).rows());
}
示例6: testWithPredicates
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
/**
* Test to verify that with predicates (filters) pushed to ORC reader layer, it returns all rows
* from the respective row groups where the row is supposed to be found. It does not return the
* specific rows as the statistics is maintained per row-group.
*
* @param f the filters to be evaluated
* @param expectedCount expected number of rows returned by ORC reader
* @throws IOException in case there was an error reading/writing
*/
@Test
@Parameters(method = "dataWithPredicates")
public void testWithPredicates(final Filter f, final int expectedCount) throws IOException {
final OrcFile.ReaderOptions rOpts = new OrcFile.ReaderOptions(new Configuration());
final OrcUtils.OrcOptions orcOptions = new OrcUtils.OrcOptions(f, TD);
final RecordReader rows = OrcUtils.createReader(ORC_BYTES_WITH_MULTIPLE_STRIDES, rOpts)
.rowsOptions(orcOptions.getOptions());
int count = 0;
Object row = null;
while (rows.hasNext()) {
row = rows.next(row);
count++;
}
rows.close();
assertEquals("Incorrect number of rows retrieved from scan.", expectedCount, count);
}
示例7: getSchema
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
Reader orcReader = OrcFile.createReader(fs, targetFilePath);
String codec = String.valueOf(orcReader.getCompression());
String schemaString = orcReader.getObjectInspector().getTypeName();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), codec, storage, "");
return datasetJsonRecord;
}
示例8: getSampleData
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
throws IOException {
Reader orcReader = OrcFile.createReader(fs, targetFilePath);
RecordReader recordReader = orcReader.rows();
int count = 0;
List<Object> list = new ArrayList<Object>();
Object row = null;
while (recordReader.hasNext() && count < 10) {
count++;
row = recordReader.next(row);
list.add(row.toString().replaceAll("[\\n\\r\\p{C}]", ""));
}
SampleDataRecord sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
return sampleDataRecord;
}
示例9: typical
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Test
public void typical() throws IOException {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("struct<a:string>");
ObjectInspector inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
WriterOptions options = OrcFile.writerOptions(conf).inspector(inspector);
Path path = new Path(temporaryFolder.getRoot().getCanonicalPath(), "part-00000");
Writer writer = OrcFile.createWriter(path, options);
writer.addRow(Arrays.asList("hello"));
writer.close();
try (OrcReader reader = new OrcReader(conf, path)) {
List<Object> next = reader.next();
assertThat(next.size(), is(1));
assertThat(next.get(0), is((Object) "hello"));
assertThat(reader.hasNext(), is(false));
}
}
示例10: typical
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Test
public void typical() throws IOException {
Path path = new Path(temporaryFolder.getRoot().getCanonicalPath(), "part-00000");
try (OrcWriter writer = new OrcWriter.Builder(conf, path).addField("a", TypeInfoFactory.stringTypeInfo).build()) {
writer.addRow("hello");
}
ReaderOptions options = OrcFile.readerOptions(conf);
Reader reader = OrcFile.createReader(path, options);
RecordReader rows = reader.rows();
@SuppressWarnings("unchecked")
List<Object> next = (List<Object>) ObjectInspectorUtils.copyToStandardJavaObject(rows.next(null),
reader.getObjectInspector());
assertThat(next.size(), is(1));
assertThat(next.get(0), is((Object) "hello"));
assertThat(rows.hasNext(), is(false));
rows.close();
}
示例11: getShipFiles
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Override
public List<String> getShipFiles() {
List<String> cacheFiles = new ArrayList<String>();
String hadoopVersion = "20S";
if (Utils.isHadoop23() || Utils.isHadoop2()) {
hadoopVersion = "23";
}
Class hadoopVersionShimsClass;
try {
hadoopVersionShimsClass = Class.forName("org.apache.hadoop.hive.shims.Hadoop" +
hadoopVersion + "Shims");
} catch (ClassNotFoundException e) {
throw new RuntimeException("Cannot find Hadoop" + hadoopVersion + "ShimsClass in classpath");
}
Class[] classList = new Class[] {OrcFile.class, HiveConf.class, AbstractSerDe.class,
org.apache.hadoop.hive.shims.HadoopShims.class, HadoopShimsSecure.class, hadoopVersionShimsClass,
Input.class};
return FuncUtils.getShipFiles(classList);
}
示例12: testMultiStore
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
@Test
public void testMultiStore() throws Exception {
pigServer.setBatchOn();
pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);");
pigServer.registerQuery("B = order A by a0;");
pigServer.registerQuery("store B into '" + OUTPUT2 + "' using OrcStorage();");
pigServer.registerQuery("store B into '" + OUTPUT3 +"' using OrcStorage('-c SNAPPY');");
pigServer.executeBatch();
Path outputFilePath = new Path(new Path(OUTPUT2), "part-r-00000");
Reader reader = OrcFile.createReader(fs, outputFilePath);
assertEquals(reader.getNumberOfRows(), 2);
assertEquals(reader.getCompression(), CompressionKind.ZLIB);
Path outputFilePath2 = new Path(new Path(OUTPUT3), "part-r-00000");
reader = OrcFile.createReader(fs, outputFilePath2);
assertEquals(reader.getNumberOfRows(), 2);
assertEquals(reader.getCompression(), CompressionKind.SNAPPY);
verifyData(outputFilePath, outputFilePath2, fs, 2);
}
示例13: OrcStreamWriter
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
public OrcStreamWriter( final Configuration config, final Path path, final String schema ) throws IOException{
FileSystem fs = FileSystem.get(config);
long stripeSize = HiveConf.getLongVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE);
CompressionKind compress = CompressionKind.valueOf(HiveConf.getVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS));
int bufferSize = HiveConf.getIntVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE);
int rowIndexStride = HiveConf.getIntVar(config, HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE);
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( schema );
ObjectInspector inspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( typeInfo );
writer = OrcFile.createWriter( fs, path, config, inspector, stripeSize, compress, bufferSize, rowIndexStride );
formatter = OrcFormatterFactory.get( typeInfo );
}
示例14: getOrcObjectInspector
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
public static ObjectInspector getOrcObjectInspector( final Configuration config , final Path path ) throws IOException{
Random rnd = new Random();
FileSystem fs = FileSystem.get( config );
FileStatus[] fsStatus = fs.listStatus( path );
Path sampleOrcPath = fsStatus[rnd.nextInt( fsStatus.length )].getPath();
Reader reader = OrcFile.createReader( sampleOrcPath , OrcFile.readerOptions( config ) );
return reader.getObjectInspector();
}
示例15: dumpOrcRecordsFromFile
import org.apache.hadoop.hive.ql.io.orc.OrcFile; //导入依赖的package包/类
private static void dumpOrcRecordsFromFile(File file) throws IOException {
StringBuilder metaInfo = new StringBuilder(1024);
metaInfo.append("File: " + file.getAbsolutePath());
Reader reader = null;
try {
reader = OrcFile.createReader(new Path(file.toPath().toString()),
OrcFile.readerOptions(new Configuration()));
} catch (Exception e) {
System.err.println("Exception caught for file: " + file);
e.printStackTrace();
}
StoreRecord record = null;
int numRecords = 0;
BlockKey firstBlkKey = null;
BlockKey lastBlkKey = null;
RecordReader rows = reader.rows();
Object row = null;
while (rows.hasNext()) {
numRecords++;
record = (StoreRecord) rows.next(row);
if (dumpData) {
StringBuilder dataInfo = new StringBuilder(1024000);
dataInfo.append("\nRecord " + numRecords + ":" + record);
System.out.println(dataInfo.toString());
}
}
}