当前位置: 首页>>代码示例>>Java>>正文


Java VectorizedRowBatch类代码示例

本文整理汇总了Java中org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch的典型用法代码示例。如果您正苦于以下问题:Java VectorizedRowBatch类的具体用法?Java VectorizedRowBatch怎么用?Java VectorizedRowBatch使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


VectorizedRowBatch类属于org.apache.hadoop.hive.ql.exec.vector包,在下文中一共展示了VectorizedRowBatch类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: addRowBatch

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
  if (buildIndex) {
    // Batch the writes up to the rowIndexStride so that we can get the
    // right size indexes.
    int posn = 0;
    while (posn < batch.size) {
      int chunkSize = Math.min(batch.size - posn, rowIndexStride - rowsInIndex);
      treeWriter.writeRootBatch(batch, posn, chunkSize);
      posn += chunkSize;
      rowsInIndex += chunkSize;
      rowsInStripe += chunkSize;
      if (rowsInIndex >= rowIndexStride) {
        createRowIndexEntry();
      }
    }
  } else {
    rowsInStripe += batch.size;
    treeWriter.writeRootBatch(batch, 0, batch.size);
  }
  memoryManager.addedRow(batch.size);
}
 
开发者ID:ampool,项目名称:monarch,代码行数:23,代码来源:AWriterImpl.java

示例2: convertFromOrc

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@VisibleForTesting
RowMetaAndData convertFromOrc( RowMetaAndData rowMetaAndData, VectorizedRowBatch batch, int currentBatchRow,
                                      SchemaDescription schemaDescription, TypeDescription typeDescription,
                                      Map<String, Integer> schemaToOrcSubcripts,
                                      SchemaDescription orcSchemaDescription ) {

  int orcColumn;
  for ( SchemaDescription.Field field : schemaDescription ) {
    SchemaDescription.Field orcField = orcSchemaDescription.getField( field.formatFieldName );
    if ( field != null ) {
      ColumnVector columnVector = batch.cols[ schemaToOrcSubcripts.get( field.pentahoFieldName ) ];
      Object orcToPentahoValue = convertFromSourceToTargetDataType( columnVector, currentBatchRow, orcField.pentahoValueMetaType );

      Object convertToSchemaValue = null;
      try {
        convertToSchemaValue = valueMetaConverter.convertFromSourceToTargetDataType( orcField.pentahoValueMetaType, field.pentahoValueMetaType, orcToPentahoValue );
      } catch ( ValueMetaConversionException e ) {
        logger.error( e );
      }
      rowMetaAndData.addValue( field.pentahoFieldName, field.pentahoValueMetaType, convertToSchemaValue );
    }
  }

  return rowMetaAndData;
}
 
开发者ID:pentaho,项目名称:pentaho-hadoop-shims,代码行数:26,代码来源:OrcConverter.java

示例3: next

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public boolean next( final NullWritable key, final VectorizedRowBatch outputBatch ) throws IOException {
  outputBatch.reset();
  setting.setPartitionValues( outputBatch );

  if( indexSize <= currentIndex ){
    if( ! currentReader.hasNext() ){
      updateCounter( currentReader.getReadStats() );
      outputBatch.endOfFile = true;
      isEnd = true;
      return false;
    }
    while( ! setSpread() ){
      if( ! currentReader.hasNext() ){
        updateCounter( currentReader.getReadStats() );
        outputBatch.endOfFile = true;
        isEnd = true;
        return false;
      }
    }
  }
  int maxSize = outputBatch.getMaxSize();
  if( indexSize < currentIndex + maxSize ){
    maxSize = indexSize - currentIndex;
  }

  for( int colIndex : needColumnIds ){
    assignors[colIndex].setColumnVector( outputBatch.cols[colIndex] , currentIndexList , currentIndex , maxSize );
  }
  outputBatch.size = maxSize;

  currentIndex += maxSize;
  if( indexSize <= currentIndex && ! currentReader.hasNext() ){
    outputBatch.endOfFile = true;
  }

  return outputBatch.size > 0;
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:39,代码来源:MDSHiveDirectVectorizedReader.java

示例4: writeRootBatch

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
void writeRootBatch(VectorizedRowBatch batch, int offset, int length) throws IOException {
  // update the statistics for the root column
  indexStatistics.increment(length);
  // I'm assuming that the root column isn't nullable so that I don't need
  // to update isPresent.
  for (int i = 0; i < childrenWriters.length; ++i) {
    childrenWriters[i].writeBatch(batch.cols[i], offset, length);
  }
}
 
开发者ID:ampool,项目名称:monarch,代码行数:11,代码来源:AWriterImpl.java

示例5: fillRows

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
/**
 * Fills an ORC batch into an array of Row.
 *
 * @param rows The batch of rows need to be filled.
 * @param schema The schema of the ORC data.
 * @param batch The ORC data.
 * @param selectedFields The list of selected ORC fields.
 * @return The number of rows that were filled.
 */
static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {

	int rowsToRead = Math.min((int) batch.count(), rows.length);

	List<TypeDescription> fieldTypes = schema.getChildren();
	// read each selected field
	for (int rowIdx = 0; rowIdx < selectedFields.length; rowIdx++) {
		int orcIdx = selectedFields[rowIdx];
		readField(rows, rowIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], null, rowsToRead);
	}
	return rowsToRead;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:22,代码来源:OrcUtils.java

示例6: processRow

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
public static void processRow(JSONWriter writer, VectorizedRowBatch batch,
        TypeDescription schema, int row) throws JSONException {
    if (schema.getCategory() == TypeDescription.Category.STRUCT) {
        List<TypeDescription> fieldTypes = schema.getChildren();
        List<String> fieldNames = schema.getFieldNames();
        writer.object();
        for (int c = 0; c < batch.cols.length; ++c) {
            writer.key(fieldNames.get(c));
            setValue(writer, batch.cols[c], fieldTypes.get(c), row);
        }
        writer.endObject();
    } else {
        setValue(writer, batch.cols[0], schema, row);
    }
}
 
开发者ID:pinterest,项目名称:secor,代码行数:16,代码来源:JsonFieldFiller.java

示例7: fillRow

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
public static void fillRow(int rowIndex, JsonConverter[] converters,
        TypeDescription schema, VectorizedRowBatch batch, JsonObject data) {
    List<String> fieldNames = schema.getFieldNames();
    for (int c = 0; c < converters.length; ++c) {
        JsonElement field = data.get(fieldNames.get(c));
        if (field == null) {
            batch.cols[c].noNulls = false;
            batch.cols[c].isNull[rowIndex] = true;
        } else {
            converters[c].convert(field, batch.cols[c], rowIndex);
        }
    }
}
 
开发者ID:pinterest,项目名称:secor,代码行数:14,代码来源:VectorColumnFiller.java

示例8: createValue

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public VectorizedRowBatch createValue() {
  return setting.createVectorizedRowBatch();
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:5,代码来源:MDSHiveDirectVectorizedReader.java

示例9: createVectorizedRowBatch

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public VectorizedRowBatch createVectorizedRowBatch(){
  return rbCtx.createVectorizedRowBatch( projectionColumn );
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:5,代码来源:HiveVectorizedReaderSetting.java

示例10: setPartitionValues

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void setPartitionValues( final VectorizedRowBatch outputBatch ){
  if( 0 < partitionValues.length ){
    rbCtx.addPartitionColsToBatch( outputBatch , partitionValues );
  }
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:7,代码来源:HiveVectorizedReaderSetting.java

示例11: flush

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
private boolean flush(BufferSegment segment, String path, TypeDescription schema)
    {
        Configuration conf = new Configuration();
        try {
            Writer writer = OrcFile.createWriter(new Path(path),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema)
                            .stripeSize(orcFileStripeSize)
                            .bufferSize(orcFileBufferSize)
                            .blockSize(orcFileBlockSize)
                            .compress(CompressionKind.ZLIB)
                            .version(OrcFile.Version.V_0_12));
            VectorizedRowBatch batch = schema.createRowBatch();
            while (segment.hasNext()) {
                String[] contents = segment.getNext();
                int rowCount = batch.size++;
//                    System.out.println("contents : message.getValues() : " + Arrays.toString(contents));
                System.out.println("contents.length : " + contents.length);
                for (int i = 0; i < contents.length; i++) {
                    ((BytesColumnVector) batch.cols[i]).setVal(rowCount, contents[i].getBytes());
                    //batch full
                    if (batch.size == batch.getMaxSize()) {
                        writer.addRowBatch(batch);
                        batch.reset();
                    }
                }
                if (batch.size != 0) {
                    writer.addRowBatch(batch);
                    batch.reset();
                }
                writer.close();
                segment.setFilePath(path);
                System.out.println("path : " + path);
            }
            return true;
        }
        catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }
 
开发者ID:dbiir,项目名称:paraflow,代码行数:42,代码来源:OrcFlushThread.java

示例12: addRowBatch

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
  flushInternalBatch();
  super.addRowBatch(batch);
}
 
开发者ID:ampool,项目名称:monarch,代码行数:6,代码来源:AWriter.java

示例13: serializeVector

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspector) throws SerDeException {
	 throw new UnsupportedOperationException("serializeVector not supported");
}
 
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:5,代码来源:EthereumBlockSerde.java

示例14: deserializeVector

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void deserializeVector(Object rowBlob, int rowsInBlob, VectorizedRowBatch reuseBatch) throws SerDeException {
	// nothing to do here
	
}
 
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:6,代码来源:EthereumBlockSerde.java

示例15: deserializeVector

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
/** VectorizedSerde **/
@Override
public void deserializeVector(Object rowBlob, int rowsInBlob, VectorizedRowBatch reuseBatch) throws SerDeException {
	// nothing to do here
}
 
开发者ID:ZuInnoTe,项目名称:hadoopcryptoledger,代码行数:6,代码来源:BitcoinBlockSerde.java


注:本文中的org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。