Java OrcFile类代码示例

本文整理汇总了Java中org.apache.orc.OrcFile类的典型用法代码示例。如果您正苦于以下问题：Java OrcFile类的具体用法？Java OrcFile怎么用？Java OrcFile使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

OrcFile类属于org.apache.orc包，在下文中一共展示了OrcFile类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: open

import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public OrcWriter<T> open(Path path) {
    if (writerOptions == null) {
        writerOptions = OrcFile.writerOptions(configuration);
    }
    if (compressionKind != null) {
        writerOptions.compress(compressionKind);
    }
    if (bufferSize != 0) {
        writerOptions.bufferSize(bufferSize);
    }
    // Add the schema to the writer options.
    TypeDescription schema = getTypeDescription();
    writerOptions.setSchema(schema);
    try {
        writer = OrcFile.createWriter(path, writerOptions);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    vectorizedRowBatch = schema.createRowBatch(batchSize);
    specialCaseSetup();
    return this;
}

开发者ID:eclecticlogic，项目名称:eclectic-orc，代码行数:24，代码来源:AbstractOrcWriter.java

示例2: writePostScript

import org.apache.orc.OrcFile; //导入依赖的package包/类
private int writePostScript(int footerLength, int metadataLength) throws IOException {
  OrcProto.PostScript.Builder builder = OrcProto.PostScript.newBuilder()
      .setCompression(writeCompressionKind(compress)).setFooterLength(footerLength)
      .setMetadataLength(metadataLength).setMagic(OrcFile.MAGIC).addVersion(version.getMajor())
      .addVersion(version.getMinor()).setWriterVersion(OrcFile.CURRENT_WRITER.getId());
  if (compress != CompressionKind.NONE) {
    builder.setCompressionBlockSize(bufferSize);
  }
  OrcProto.PostScript ps = builder.build();
  // need to write this uncompressed
  long startPosn = rawWriter.getPos();
  ps.writeTo(rawWriter);
  long length = rawWriter.getPos() - startPosn;
  if (length > 255) {
    throw new IllegalArgumentException("PostScript too large at " + length);
  }
  return (int) length;
}

开发者ID:ampool，项目名称:monarch，代码行数:19，代码来源:AWriterImpl.java

示例3: JsonORCFileWriter

import org.apache.orc.OrcFile; //导入依赖的package包/类
public JsonORCFileWriter(LogFilePath logFilePath, CompressionCodec codec)
        throws IOException {
    Configuration conf = new Configuration();
    Path path = new Path(logFilePath.getLogFilePath());
    schema = schemaProvider.getSchema(logFilePath.getTopic(),
            logFilePath);
    List<TypeDescription> fieldTypes = schema.getChildren();
    converters = new JsonConverter[fieldTypes.size()];
    for (int c = 0; c < converters.length; ++c) {
        converters[c] = VectorColumnFiller.createConverter(fieldTypes
                .get(c));
    }

    writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
            .compress(resolveCompression(codec)).setSchema(schema));
    batch = schema.createRowBatch();
}

开发者ID:pinterest，项目名称:secor，代码行数:18，代码来源:JsonORCFileReaderWriterFactory.java

示例4: PentahoOrcRecordWriter

import org.apache.orc.OrcFile; //导入依赖的package包/类
public PentahoOrcRecordWriter( SchemaDescription schemaDescription, TypeDescription schema, String filePath,
                               Configuration conf ) {
  this.schemaDescription = schemaDescription;
  this.schema = schema;

  final AtomicInteger fieldNumber = new AtomicInteger();  //Mutable field count
  schemaDescription.forEach( field -> setOutputMeta( fieldNumber, field ) );
  outputRowMetaAndData = new RowMetaAndData( outputRowMeta, new Object[ fieldNumber.get() ] );

  try {
    writer = OrcFile.createWriter( new Path( filePath ),
      OrcFile.writerOptions( conf )
        .setSchema( schema ) );
    batch = schema.createRowBatch();
  } catch ( IOException e ) {
    logger.error( e );
  }

  //Write the addition metadata for the fields
  new OrcMetaDataWriter( writer ).write( schemaDescription );
}

开发者ID:pentaho，项目名称:pentaho-hadoop-shims，代码行数:22，代码来源:PentahoOrcRecordWriter.java

示例5: test

import org.apache.orc.OrcFile; //导入依赖的package包/类
@Test
public void test () throws IOException, Descriptors.DescriptorValidationException
{
    Configuration conf = new Configuration();
    System.setProperty("hadoop.home.dir", "/");
    FileSystem fileSystem = FileSystem.get(URI.create("hdfs://presto00:9000"), conf);
    Path hdfsDirPath = new Path("/rainbow2/orc_new_compress");
    System.out.println(fileSystem.isFile(hdfsDirPath));
    FileStatus[] fileStatuses = fileSystem.listStatus(hdfsDirPath);
    System.out.println(fileStatuses.length);
    for (FileStatus status : fileStatuses)
    {
        status.getPath();
        System.out.println(status.getPath() + ", " + status.getLen());
    }

    Reader reader = OrcFile.createReader(fileStatuses[0].getPath(),
            OrcFile.readerOptions(conf));
    System.out.println("file length:" + reader.getFileTail().getFileLength());
    List<String> columnNames = new ArrayList<>();
    columnNames.add("samplepercent");
    System.out.println(reader.getRawDataSizeOfColumns(columnNames));
    System.out.println(reader.getFileTail().getFooter().getTypes(0).getFieldNames(0));
    System.out.println(reader.getTypes().get(0).getSerializedSize());

    List<Reader> readers = new ArrayList<>();
    for (FileStatus fileStatus : fileStatuses)
    {
        Reader reader1 = OrcFile.createReader(fileStatus.getPath(),
                OrcFile.readerOptions(conf));
        readers.add(reader1);
        System.out.println("content size: " + reader1.getContentLength() + ", raw size: "
        + reader1.getRawDataSize());
    }

    for (String columnName : reader.getSchema().getFieldNames())
    {
        System.out.println(columnName);
    }
}

开发者ID:dbiir，项目名称:rainbow，代码行数:41，代码来源:TestOrcMetadata.java

示例6: createStream

import org.apache.orc.OrcFile; //导入依赖的package包/类
/**
 * Create a stream to store part of a column.
 * 
 * @param column the column id for the stream
 * @param kind the kind of stream
 * @return The output outStream that the section needs to be written to.
 * @throws IOException
 */
public OutStream createStream(int column, OrcProto.Stream.Kind kind) throws IOException {
  final StreamName name = new StreamName(column, kind);
  final EnumSet<CompressionCodec.Modifier> modifiers;

  switch (kind) {
    case BLOOM_FILTER:
    case DATA:
    case DICTIONARY_DATA:
      if (getCompressionStrategy() == OrcFile.CompressionStrategy.SPEED) {
        modifiers = EnumSet.of(CompressionCodec.Modifier.FAST, CompressionCodec.Modifier.TEXT);
      } else {
        modifiers =
            EnumSet.of(CompressionCodec.Modifier.DEFAULT, CompressionCodec.Modifier.TEXT);
      }
      break;
    case LENGTH:
    case DICTIONARY_COUNT:
    case PRESENT:
    case ROW_INDEX:
    case SECONDARY:
      // easily compressed using the fastest modes
      modifiers =
          EnumSet.of(CompressionCodec.Modifier.FASTEST, CompressionCodec.Modifier.BINARY);
      break;
    default:
      LOG.warn("Missing ORC compression modifiers for " + kind);
      modifiers = null;
      break;
  }

  BufferedStream result = streams.get(name);
  if (result == null) {
    result = new BufferedStream(name.toString(), bufferSize,
        codec == null ? codec : codec.modify(modifiers));
    streams.put(name, result);
  }
  return result.outStream;
}

开发者ID:ampool，项目名称:monarch，代码行数:47，代码来源:AWriterImpl.java

示例7: createIntegerWriter

import org.apache.orc.OrcFile; //导入依赖的package包/类
IntegerWriter createIntegerWriter(PositionedOutputStream output, boolean signed,
    boolean isDirectV2, StreamFactory writer) {
  if (isDirectV2) {
    boolean alignedBitpacking = false;
    if (writer.getEncodingStrategy().equals(OrcFile.EncodingStrategy.SPEED)) {
      alignedBitpacking = true;
    }
    return new RunLengthIntegerWriterV2(output, signed, alignedBitpacking);
  } else {
    return new RunLengthIntegerWriter(output, signed);
  }
}

开发者ID:ampool，项目名称:monarch，代码行数:13，代码来源:AWriterImpl.java

示例8: getStream

import org.apache.orc.OrcFile; //导入依赖的package包/类
@VisibleForTesting
public ADataOutputStream getStream() throws IOException {
  if (rawWriter == null) {
    // final OutputStream os = new FileOutputStream("/tmp/abc.orc");
    rawWriter = new ADataOutputStream(null);
    // rawWriter = fs.create(path, false, HDFS_BUFFER_SIZE,
    // fs.getDefaultReplication(path), blockSize);
    rawWriter.writeBytes(OrcFile.MAGIC);
    headerLength = rawWriter.getPos();
    writer = new OutStream("metadata", bufferSize, codec, new DirectStream(rawWriter));
    protobufWriter = CodedOutputStream.newInstance(writer);
  }
  return rawWriter;
}

开发者ID:ampool，项目名称:monarch，代码行数:15，代码来源:AWriterImpl.java

示例9: initialize

import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public void initialize(Map<String, Object> metaData) {
    try {
        Configuration conf = new Configuration();
        // conf.set(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(), "tags");
        processor = new OrcEntityProcessor(OrcFile.createWriter(new Path(filename),
                OrcFile.writerOptions(conf).setSchema(SCHEMA)), SCHEMA.createRowBatch());
    } catch (IOException e) {
        throw new OsmosisRuntimeException(e);
    }
}

开发者ID:mojodna，项目名称:osm2orc，代码行数:12，代码来源:OrcWriter.java

示例10: JsonORCFileReader

import org.apache.orc.OrcFile; //导入依赖的package包/类
@SuppressWarnings("deprecation")
public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec)
        throws IOException {
    schema = schemaProvider.getSchema(logFilePath.getTopic(),
            logFilePath);
    Path path = new Path(logFilePath.getLogFilePath());
    Reader reader = OrcFile.createReader(path,
            OrcFile.readerOptions(new Configuration(true)));
    offset = logFilePath.getOffset();
    rows = reader.rows();
    batch = reader.getSchema().createRowBatch();
    rows.nextBatch(batch);
}

开发者ID:pinterest，项目名称:secor，代码行数:14，代码来源:JsonORCFileReaderWriterFactory.java

示例11: getReader

import org.apache.orc.OrcFile; //导入依赖的package包/类
private Reader getReader( ) throws Exception {
  return inClassloader( () -> {
    checkNullFileName();
    Path filePath;
    FileSystem fs;
    Reader orcReader;
    try {
      filePath = new Path( fileName );
      fs = FileSystem.get( filePath.toUri(), conf );
      if ( !fs.exists( filePath ) ) {
        throw new NoSuchFileException( fileName );
      }

      if ( fs.getFileStatus( filePath ).isDirectory() ) {
        PathFilter pathFilter = new PathFilter() {
          public boolean accept( Path file ) {
            return file.getName().endsWith( ".orc" );
          }
        };

        FileStatus[] fileStatuses = fs.listStatus( filePath, pathFilter );
        if ( fileStatuses.length == 0 ) {
          throw new NoSuchFileException( fileName );
        }

        filePath = fileStatuses[0].getPath();
      }

      orcReader = OrcFile.createReader( filePath,
        OrcFile.readerOptions( conf ).filesystem( fs ) );
    } catch ( IOException e ) {
      throw new RuntimeException( "Unable to read data from file " + fileName, e );
    }
    return orcReader;
  } );
}

开发者ID:pentaho，项目名称:pentaho-hadoop-shims，代码行数:37，代码来源:PentahoOrcInputFormat.java

示例12: withOptions

import org.apache.orc.OrcFile; //导入依赖的package包/类
@Override
public OrcHandle<T> withOptions(OrcFile.WriterOptions writerOptions) {
    this.writerOptions = writerOptions;
    return this;
}

开发者ID:eclecticlogic，项目名称:eclectic-orc，代码行数:6，代码来源:AbstractOrcWriter.java

示例13: flush

import org.apache.orc.OrcFile; //导入依赖的package包/类
private boolean flush(BufferSegment segment, String path, TypeDescription schema)
    {
        Configuration conf = new Configuration();
        try {
            Writer writer = OrcFile.createWriter(new Path(path),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema)
                            .stripeSize(orcFileStripeSize)
                            .bufferSize(orcFileBufferSize)
                            .blockSize(orcFileBlockSize)
                            .compress(CompressionKind.ZLIB)
                            .version(OrcFile.Version.V_0_12));
            VectorizedRowBatch batch = schema.createRowBatch();
            while (segment.hasNext()) {
                String[] contents = segment.getNext();
                int rowCount = batch.size++;
//                    System.out.println("contents : message.getValues() : " + Arrays.toString(contents));
                System.out.println("contents.length : " + contents.length);
                for (int i = 0; i < contents.length; i++) {
                    ((BytesColumnVector) batch.cols[i]).setVal(rowCount, contents[i].getBytes());
                    //batch full
                    if (batch.size == batch.getMaxSize()) {
                        writer.addRowBatch(batch);
                        batch.reset();
                    }
                }
                if (batch.size != 0) {
                    writer.addRowBatch(batch);
                    batch.reset();
                }
                writer.close();
                segment.setFilePath(path);
                System.out.println("path : " + path);
            }
            return true;
        }
        catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }

开发者ID:dbiir，项目名称:paraflow，代码行数:42，代码来源:OrcFlushThread.java

示例14: AWriterImpl

import org.apache.orc.OrcFile; //导入依赖的package包/类
public AWriterImpl(FileSystem fs, Path path, OrcFile.WriterOptions opts) throws IOException {
  this.fs = fs;
  this.path = path;
  this.conf = opts.getConfiguration();
  this.callback = opts.getCallback();
  this.schema = opts.getSchema();
  if (callback != null) {
    callbackContext = new OrcFile.WriterContext() {

      @Override
      public Writer getWriter() {
        return AWriterImpl.this;
      }
    };
  } else {
    callbackContext = null;
  }
  this.adjustedStripeSize = opts.getStripeSize();
  this.defaultStripeSize = opts.getStripeSize();
  this.version = opts.getVersion();
  this.encodingStrategy = opts.getEncodingStrategy();
  this.compressionStrategy = opts.getCompressionStrategy();
  this.addBlockPadding = opts.getBlockPadding();
  this.blockSize = opts.getBlockSize();
  this.paddingTolerance = opts.getPaddingTolerance();
  this.compress = opts.getCompress();
  this.rowIndexStride = opts.getRowIndexStride();
  this.memoryManager = opts.getMemoryManager();
  buildIndex = rowIndexStride > 0;
  codec = createCodec(compress);
  int numColumns = schema.getMaximumId() + 1;
  if (opts.isEnforceBufferSize()) {
    this.bufferSize = opts.getBufferSize();
  } else {
    this.bufferSize = getEstimatedBufferSize(defaultStripeSize, numColumns, opts.getBufferSize());
  }
  if (version == OrcFile.Version.V_0_11) {
    /* do not write bloom filters for ORC v11 */
    this.bloomFilterColumns = new boolean[schema.getMaximumId() + 1];
  } else {
    this.bloomFilterColumns = OrcUtils.includeColumns(opts.getBloomFilterColumns(), schema);
  }
  this.bloomFilterFpp = opts.getBloomFilterFpp();
  treeWriter = createTreeWriter(schema, streamFactory, false);
  if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
    throw new IllegalArgumentException("Row stride must be at least " + MIN_ROW_INDEX_STRIDE);
  }

  // ensure that we are able to handle callbacks before we register ourselves
  memoryManager.addWriter(path, opts.getStripeSize(), this);
  // LOG.info("ORC writer created for path: {} with stripeSize: {} blockSize: {}" +
  // " compression: {} bufferSize: {}", path, defaultStripeSize, blockSize,
  // compress, bufferSize);
}

开发者ID:ampool，项目名称:monarch，代码行数:55，代码来源:AWriterImpl.java

示例15: getVersion

import org.apache.orc.OrcFile; //导入依赖的package包/类
/**
 * Get the version of the file to write.
 */
public OrcFile.Version getVersion() {
  return version;
}

开发者ID:ampool，项目名称:monarch，代码行数:7，代码来源:AWriterImpl.java

注：本文中的org.apache.orc.OrcFile类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。