本文整理汇总了Java中org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch类的典型用法代码示例。如果您正苦于以下问题:Java VectorizedRowBatch类的具体用法?Java VectorizedRowBatch怎么用?Java VectorizedRowBatch使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
VectorizedRowBatch类属于org.apache.hadoop.hive.ql.exec.vector包,在下文中一共展示了VectorizedRowBatch类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: addRowBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
if (buildIndex) {
// Batch the writes up to the rowIndexStride so that we can get the
// right size indexes.
int posn = 0;
while (posn < batch.size) {
int chunkSize = Math.min(batch.size - posn, rowIndexStride - rowsInIndex);
treeWriter.writeRootBatch(batch, posn, chunkSize);
posn += chunkSize;
rowsInIndex += chunkSize;
rowsInStripe += chunkSize;
if (rowsInIndex >= rowIndexStride) {
createRowIndexEntry();
}
}
} else {
rowsInStripe += batch.size;
treeWriter.writeRootBatch(batch, 0, batch.size);
}
memoryManager.addedRow(batch.size);
}
示例2: convertFromOrc
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@VisibleForTesting
RowMetaAndData convertFromOrc( RowMetaAndData rowMetaAndData, VectorizedRowBatch batch, int currentBatchRow,
SchemaDescription schemaDescription, TypeDescription typeDescription,
Map<String, Integer> schemaToOrcSubcripts,
SchemaDescription orcSchemaDescription ) {
int orcColumn;
for ( SchemaDescription.Field field : schemaDescription ) {
SchemaDescription.Field orcField = orcSchemaDescription.getField( field.formatFieldName );
if ( field != null ) {
ColumnVector columnVector = batch.cols[ schemaToOrcSubcripts.get( field.pentahoFieldName ) ];
Object orcToPentahoValue = convertFromSourceToTargetDataType( columnVector, currentBatchRow, orcField.pentahoValueMetaType );
Object convertToSchemaValue = null;
try {
convertToSchemaValue = valueMetaConverter.convertFromSourceToTargetDataType( orcField.pentahoValueMetaType, field.pentahoValueMetaType, orcToPentahoValue );
} catch ( ValueMetaConversionException e ) {
logger.error( e );
}
rowMetaAndData.addValue( field.pentahoFieldName, field.pentahoValueMetaType, convertToSchemaValue );
}
}
return rowMetaAndData;
}
示例3: next
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public boolean next( final NullWritable key, final VectorizedRowBatch outputBatch ) throws IOException {
outputBatch.reset();
setting.setPartitionValues( outputBatch );
if( indexSize <= currentIndex ){
if( ! currentReader.hasNext() ){
updateCounter( currentReader.getReadStats() );
outputBatch.endOfFile = true;
isEnd = true;
return false;
}
while( ! setSpread() ){
if( ! currentReader.hasNext() ){
updateCounter( currentReader.getReadStats() );
outputBatch.endOfFile = true;
isEnd = true;
return false;
}
}
}
int maxSize = outputBatch.getMaxSize();
if( indexSize < currentIndex + maxSize ){
maxSize = indexSize - currentIndex;
}
for( int colIndex : needColumnIds ){
assignors[colIndex].setColumnVector( outputBatch.cols[colIndex] , currentIndexList , currentIndex , maxSize );
}
outputBatch.size = maxSize;
currentIndex += maxSize;
if( indexSize <= currentIndex && ! currentReader.hasNext() ){
outputBatch.endOfFile = true;
}
return outputBatch.size > 0;
}
示例4: writeRootBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
void writeRootBatch(VectorizedRowBatch batch, int offset, int length) throws IOException {
// update the statistics for the root column
indexStatistics.increment(length);
// I'm assuming that the root column isn't nullable so that I don't need
// to update isPresent.
for (int i = 0; i < childrenWriters.length; ++i) {
childrenWriters[i].writeBatch(batch.cols[i], offset, length);
}
}
示例5: fillRows
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
/**
* Fills an ORC batch into an array of Row.
*
* @param rows The batch of rows need to be filled.
* @param schema The schema of the ORC data.
* @param batch The ORC data.
* @param selectedFields The list of selected ORC fields.
* @return The number of rows that were filled.
*/
static int fillRows(Row[] rows, TypeDescription schema, VectorizedRowBatch batch, int[] selectedFields) {
int rowsToRead = Math.min((int) batch.count(), rows.length);
List<TypeDescription> fieldTypes = schema.getChildren();
// read each selected field
for (int rowIdx = 0; rowIdx < selectedFields.length; rowIdx++) {
int orcIdx = selectedFields[rowIdx];
readField(rows, rowIdx, fieldTypes.get(orcIdx), batch.cols[orcIdx], null, rowsToRead);
}
return rowsToRead;
}
示例6: processRow
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
public static void processRow(JSONWriter writer, VectorizedRowBatch batch,
TypeDescription schema, int row) throws JSONException {
if (schema.getCategory() == TypeDescription.Category.STRUCT) {
List<TypeDescription> fieldTypes = schema.getChildren();
List<String> fieldNames = schema.getFieldNames();
writer.object();
for (int c = 0; c < batch.cols.length; ++c) {
writer.key(fieldNames.get(c));
setValue(writer, batch.cols[c], fieldTypes.get(c), row);
}
writer.endObject();
} else {
setValue(writer, batch.cols[0], schema, row);
}
}
示例7: fillRow
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
public static void fillRow(int rowIndex, JsonConverter[] converters,
TypeDescription schema, VectorizedRowBatch batch, JsonObject data) {
List<String> fieldNames = schema.getFieldNames();
for (int c = 0; c < converters.length; ++c) {
JsonElement field = data.get(fieldNames.get(c));
if (field == null) {
batch.cols[c].noNulls = false;
batch.cols[c].isNull[rowIndex] = true;
} else {
converters[c].convert(field, batch.cols[c], rowIndex);
}
}
}
示例8: createValue
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public VectorizedRowBatch createValue() {
return setting.createVectorizedRowBatch();
}
示例9: createVectorizedRowBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public VectorizedRowBatch createVectorizedRowBatch(){
return rbCtx.createVectorizedRowBatch( projectionColumn );
}
示例10: setPartitionValues
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void setPartitionValues( final VectorizedRowBatch outputBatch ){
if( 0 < partitionValues.length ){
rbCtx.addPartitionColsToBatch( outputBatch , partitionValues );
}
}
示例11: flush
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
private boolean flush(BufferSegment segment, String path, TypeDescription schema)
{
Configuration conf = new Configuration();
try {
Writer writer = OrcFile.createWriter(new Path(path),
OrcFile.writerOptions(conf)
.setSchema(schema)
.stripeSize(orcFileStripeSize)
.bufferSize(orcFileBufferSize)
.blockSize(orcFileBlockSize)
.compress(CompressionKind.ZLIB)
.version(OrcFile.Version.V_0_12));
VectorizedRowBatch batch = schema.createRowBatch();
while (segment.hasNext()) {
String[] contents = segment.getNext();
int rowCount = batch.size++;
// System.out.println("contents : message.getValues() : " + Arrays.toString(contents));
System.out.println("contents.length : " + contents.length);
for (int i = 0; i < contents.length; i++) {
((BytesColumnVector) batch.cols[i]).setVal(rowCount, contents[i].getBytes());
//batch full
if (batch.size == batch.getMaxSize()) {
writer.addRowBatch(batch);
batch.reset();
}
}
if (batch.size != 0) {
writer.addRowBatch(batch);
batch.reset();
}
writer.close();
segment.setFilePath(path);
System.out.println("path : " + path);
}
return true;
}
catch (IOException e) {
e.printStackTrace();
return false;
}
}
示例12: addRowBatch
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void addRowBatch(VectorizedRowBatch batch) throws IOException {
flushInternalBatch();
super.addRowBatch(batch);
}
示例13: serializeVector
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspector) throws SerDeException {
throw new UnsupportedOperationException("serializeVector not supported");
}
示例14: deserializeVector
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
@Override
public void deserializeVector(Object rowBlob, int rowsInBlob, VectorizedRowBatch reuseBatch) throws SerDeException {
// nothing to do here
}
示例15: deserializeVector
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; //导入依赖的package包/类
/** VectorizedSerde **/
@Override
public void deserializeVector(Object rowBlob, int rowsInBlob, VectorizedRowBatch reuseBatch) throws SerDeException {
// nothing to do here
}