本文整理汇总了Java中org.apache.orc.OrcFile类的典型用法代码示例。如果您正苦于以下问题:Java OrcFile类的具体用法?Java OrcFile怎么用?Java OrcFile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
OrcFile类属于org.apache.orc包,在下文中一共展示了OrcFile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: open
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public OrcWriter<T> open(Path path) {
if (writerOptions == null) {
writerOptions = OrcFile.writerOptions(configuration);
}
if (compressionKind != null) {
writerOptions.compress(compressionKind);
}
if (bufferSize != 0) {
writerOptions.bufferSize(bufferSize);
}
// Add the schema to the writer options.
TypeDescription schema = getTypeDescription();
writerOptions.setSchema(schema);
try {
writer = OrcFile.createWriter(path, writerOptions);
} catch (IOException e) {
throw new RuntimeException(e);
}
vectorizedRowBatch = schema.createRowBatch(batchSize);
specialCaseSetup();
return this;
}
示例2: writePostScript
import org.apache.orc.OrcFile; //导入依赖的package包/类
private int writePostScript(int footerLength, int metadataLength) throws IOException {
OrcProto.PostScript.Builder builder = OrcProto.PostScript.newBuilder()
.setCompression(writeCompressionKind(compress)).setFooterLength(footerLength)
.setMetadataLength(metadataLength).setMagic(OrcFile.MAGIC).addVersion(version.getMajor())
.addVersion(version.getMinor()).setWriterVersion(OrcFile.CURRENT_WRITER.getId());
if (compress != CompressionKind.NONE) {
builder.setCompressionBlockSize(bufferSize);
}
OrcProto.PostScript ps = builder.build();
// need to write this uncompressed
long startPosn = rawWriter.getPos();
ps.writeTo(rawWriter);
long length = rawWriter.getPos() - startPosn;
if (length > 255) {
throw new IllegalArgumentException("PostScript too large at " + length);
}
return (int) length;
}
示例3: JsonORCFileWriter
import org.apache.orc.OrcFile; //导入依赖的package包/类
public JsonORCFileWriter(LogFilePath logFilePath, CompressionCodec codec)
throws IOException {
Configuration conf = new Configuration();
Path path = new Path(logFilePath.getLogFilePath());
schema = schemaProvider.getSchema(logFilePath.getTopic(),
logFilePath);
List<TypeDescription> fieldTypes = schema.getChildren();
converters = new JsonConverter[fieldTypes.size()];
for (int c = 0; c < converters.length; ++c) {
converters[c] = VectorColumnFiller.createConverter(fieldTypes
.get(c));
}
writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
.compress(resolveCompression(codec)).setSchema(schema));
batch = schema.createRowBatch();
}
示例4: PentahoOrcRecordWriter
import org.apache.orc.OrcFile; //导入依赖的package包/类
public PentahoOrcRecordWriter( SchemaDescription schemaDescription, TypeDescription schema, String filePath,
Configuration conf ) {
this.schemaDescription = schemaDescription;
this.schema = schema;
final AtomicInteger fieldNumber = new AtomicInteger(); //Mutable field count
schemaDescription.forEach( field -> setOutputMeta( fieldNumber, field ) );
outputRowMetaAndData = new RowMetaAndData( outputRowMeta, new Object[ fieldNumber.get() ] );
try {
writer = OrcFile.createWriter( new Path( filePath ),
OrcFile.writerOptions( conf )
.setSchema( schema ) );
batch = schema.createRowBatch();
} catch ( IOException e ) {
logger.error( e );
}
//Write the addition metadata for the fields
new OrcMetaDataWriter( writer ).write( schemaDescription );
}
示例5: test
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Test
public void test () throws IOException, Descriptors.DescriptorValidationException
{
Configuration conf = new Configuration();
System.setProperty("hadoop.home.dir", "/");
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://presto00:9000"), conf);
Path hdfsDirPath = new Path("/rainbow2/orc_new_compress");
System.out.println(fileSystem.isFile(hdfsDirPath));
FileStatus[] fileStatuses = fileSystem.listStatus(hdfsDirPath);
System.out.println(fileStatuses.length);
for (FileStatus status : fileStatuses)
{
status.getPath();
System.out.println(status.getPath() + ", " + status.getLen());
}
Reader reader = OrcFile.createReader(fileStatuses[0].getPath(),
OrcFile.readerOptions(conf));
System.out.println("file length:" + reader.getFileTail().getFileLength());
List<String> columnNames = new ArrayList<>();
columnNames.add("samplepercent");
System.out.println(reader.getRawDataSizeOfColumns(columnNames));
System.out.println(reader.getFileTail().getFooter().getTypes(0).getFieldNames(0));
System.out.println(reader.getTypes().get(0).getSerializedSize());
List<Reader> readers = new ArrayList<>();
for (FileStatus fileStatus : fileStatuses)
{
Reader reader1 = OrcFile.createReader(fileStatus.getPath(),
OrcFile.readerOptions(conf));
readers.add(reader1);
System.out.println("content size: " + reader1.getContentLength() + ", raw size: "
+ reader1.getRawDataSize());
}
for (String columnName : reader.getSchema().getFieldNames())
{
System.out.println(columnName);
}
}
示例6: createStream
import org.apache.orc.OrcFile; //导入依赖的package包/类
/**
* Create a stream to store part of a column.
*
* @param column the column id for the stream
* @param kind the kind of stream
* @return The output outStream that the section needs to be written to.
* @throws IOException
*/
public OutStream createStream(int column, OrcProto.Stream.Kind kind) throws IOException {
final StreamName name = new StreamName(column, kind);
final EnumSet<CompressionCodec.Modifier> modifiers;
switch (kind) {
case BLOOM_FILTER:
case DATA:
case DICTIONARY_DATA:
if (getCompressionStrategy() == OrcFile.CompressionStrategy.SPEED) {
modifiers = EnumSet.of(CompressionCodec.Modifier.FAST, CompressionCodec.Modifier.TEXT);
} else {
modifiers =
EnumSet.of(CompressionCodec.Modifier.DEFAULT, CompressionCodec.Modifier.TEXT);
}
break;
case LENGTH:
case DICTIONARY_COUNT:
case PRESENT:
case ROW_INDEX:
case SECONDARY:
// easily compressed using the fastest modes
modifiers =
EnumSet.of(CompressionCodec.Modifier.FASTEST, CompressionCodec.Modifier.BINARY);
break;
default:
LOG.warn("Missing ORC compression modifiers for " + kind);
modifiers = null;
break;
}
BufferedStream result = streams.get(name);
if (result == null) {
result = new BufferedStream(name.toString(), bufferSize,
codec == null ? codec : codec.modify(modifiers));
streams.put(name, result);
}
return result.outStream;
}
示例7: createIntegerWriter
import org.apache.orc.OrcFile; //导入依赖的package包/类
IntegerWriter createIntegerWriter(PositionedOutputStream output, boolean signed,
boolean isDirectV2, StreamFactory writer) {
if (isDirectV2) {
boolean alignedBitpacking = false;
if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
alignedBitpacking = true;
}
return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking);
} else {
return new RunLengthIntegerWriter(output, signed);
}
}
示例8: getStream
import org.apache.orc.OrcFile; //导入依赖的package包/类
@VisibleForTesting
public ADataOutputStream getStream() throws IOException {
if (rawWriter == null) {
// final OutputStream os = new FileOutputStream("/tmp/abc.orc");
rawWriter = new ADataOutputStream(null);
// rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
// fs.getDefaultReplication(path), blockSize);
rawWriter.writeBytes(OrcFile.MAGIC);
headerLength = rawWriter.getPos();
writer = new OutStream("metadata", bufferSize, codec, new DirectStream(rawWriter));
protobufWriter = CodedOutputStream.newInstance(writer);
}
return rawWriter;
}
示例9: initialize
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public void initialize(Map<String, Object> metaData) {
try {
Configuration conf = new Configuration();
// conf.set(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(), "tags");
processor = new OrcEntityProcessor(OrcFile.createWriter(new Path(filename),
OrcFile.writerOptions(conf).setSchema(SCHEMA)), SCHEMA.createRowBatch());
} catch (IOException e) {
throw new OsmosisRuntimeException(e);
}
}
示例10: JsonORCFileReader
import org.apache.orc.OrcFile; //导入依赖的package包/类
@SuppressWarnings("deprecation")
public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec)
throws IOException {
schema = schemaProvider.getSchema(logFilePath.getTopic(),
logFilePath);
Path path = new Path(logFilePath.getLogFilePath());
Reader reader = OrcFile.createReader(path,
OrcFile.readerOptions(new Configuration(true)));
offset = logFilePath.getOffset();
rows = reader.rows();
batch = reader.getSchema().createRowBatch();
rows.nextBatch(batch);
}
示例11: getReader
import org.apache.orc.OrcFile; //导入依赖的package包/类
private Reader getReader( ) throws Exception {
return inClassloader( () -> {
checkNullFileName();
Path filePath;
FileSystem fs;
Reader orcReader;
try {
filePath = new Path( fileName );
fs = FileSystem.get( filePath.toUri(), conf );
if ( !fs.exists( filePath ) ) {
throw new NoSuchFileException( fileName );
}
if ( fs.getFileStatus( filePath ).isDirectory() ) {
PathFilter pathFilter = new PathFilter() {
public boolean accept( Path file ) {
return file.getName().endsWith( ".orc" );
}
};
FileStatus[] fileStatuses = fs.listStatus( filePath, pathFilter );
if ( fileStatuses.length == 0 ) {
throw new NoSuchFileException( fileName );
}
filePath = fileStatuses[0].getPath();
}
orcReader = OrcFile.createReader( filePath,
OrcFile.readerOptions( conf ).filesystem( fs ) );
} catch ( IOException e ) {
throw new RuntimeException( "Unable to read data from file " + fileName, e );
}
return orcReader;
} );
}
示例12: withOptions
import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public OrcHandle<T> withOptions(OrcFile.WriterOptions writerOptions) {
this.writerOptions = writerOptions;
return this;
}
示例13: flush
import org.apache.orc.OrcFile; //导入依赖的package包/类
private boolean flush(BufferSegment segment, String path, TypeDescription schema)
{
Configuration conf = new Configuration();
try {
Writer writer = OrcFile.createWriter(new Path(path),
OrcFile.writerOptions(conf)
.setSchema(schema)
.stripeSize(orcFileStripeSize)
.bufferSize(orcFileBufferSize)
.blockSize(orcFileBlockSize)
.compress(CompressionKind.ZLIB)
.version(OrcFile.Version.V_0_12));
VectorizedRowBatch batch = schema.createRowBatch();
while (segment.hasNext()) {
String[] contents = segment.getNext();
int rowCount = batch.size++;
// System.out.println("contents : message.getValues() : " + Arrays.toString(contents));
System.out.println("contents.length : " + contents.length);
for (int i = 0; i < contents.length; i++) {
((BytesColumnVector) batch.cols[i]).setVal(rowCount, contents[i].getBytes());
//batch full
if (batch.size == batch.getMaxSize()) {
writer.addRowBatch(batch);
batch.reset();
}
}
if (batch.size != 0) {
writer.addRowBatch(batch);
batch.reset();
}
writer.close();
segment.setFilePath(path);
System.out.println("path : " + path);
}
return true;
}
catch (IOException e) {
e.printStackTrace();
return false;
}
}
示例14: AWriterImpl
import org.apache.orc.OrcFile; //导入依赖的package包/类
public AWriterImpl(FileSystem fs, Path path, OrcFile.WriterOptions opts) throws IOException {
this.fs = fs;
this.path = path;
this.conf = opts.getConfiguration();
this.callback = opts.getCallback();
this.schema = opts.getSchema();
if (callback != null) {
callbackContext = new OrcFile.WriterContext() {
@Override
public Writer getWriter() {
return AWriterImpl.this;
}
};
} else {
callbackContext = null;
}
this.adjustedStripeSize = opts.getStripeSize();
this.defaultStripeSize = opts.getStripeSize();
this.version = opts.getVersion();
this.encodingStrategy = opts.getEncodingStrategy();
this.compressionStrategy = opts.getCompressionStrategy();
this.addBlockPadding = opts.getBlockPadding();
this.blockSize = opts.getBlockSize();
this.paddingTolerance = opts.getPaddingTolerance();
this.compress = opts.getCompress();
this.rowIndexStride = opts.getRowIndexStride();
this.memoryManager = opts.getMemoryManager();
buildIndex = rowIndexStride > 0;
codec = createCodec(compress);
int numColumns = schema.getMaximumId() + 1;
if (opts.isEnforceBufferSize()) {
this.bufferSize = opts.getBufferSize();
} else {
this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, opts.getBufferSize());
}
if (version == OrcFile.Version.V_0_11) {
/* do not write bloom filters for ORC v11 */
this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
} else {
this.bloomFilterColumns = OrcUtils.includeColumns(opts.getBloomFilterColumns(), schema);
}
this.bloomFilterFpp = opts.getBloomFilterFpp();
treeWriter = createTreeWriter(schema, streamFactory, false);
if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
throw new IllegalArgumentException("Row stride must be at least " + MIN_ROW_INDEX_STRIDE);
}
// ensure that we are able to handle callbacks before we register ourselves
memoryManager.addWriter(path, opts.getStripeSize(), this);
// LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
// " compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
// compress, bufferSize);
}
示例15: getVersion
import org.apache.orc.OrcFile; //导入依赖的package包/类
/**
* Get the version of the file to write.
*/
public OrcFile.Version getVersion() {
return version;
}