当前位置: 首页>>代码示例>>Java>>正文


Java ColumnProjectionUtils类代码示例

本文整理汇总了Java中org.apache.hadoop.hive.serde2.ColumnProjectionUtils的典型用法代码示例。如果您正苦于以下问题:Java ColumnProjectionUtils类的具体用法?Java ColumnProjectionUtils怎么用?Java ColumnProjectionUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


ColumnProjectionUtils类属于org.apache.hadoop.hive.serde2包,在下文中一共展示了ColumnProjectionUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initialize

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
@Override
public void initialize( final Configuration conf, final Properties table , final Properties part ) throws SerDeException{
  LOG.info( table.toString() );
  if( part != null ){
    LOG.info( part.toString() );
  }
  String columnNameProperty = table.getProperty(serdeConstants.LIST_COLUMNS);
  String columnTypeProperty = table.getProperty(serdeConstants.LIST_COLUMN_TYPES);

  String projectionColumnNames = conf.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , "" );

  StructTypeInfo rootType;
  if( projectionColumnNames.isEmpty() ){
    rootType = getAllReadTypeInfo( columnNameProperty , columnTypeProperty );
  }
  else{
    rootType = getColumnProjectionTypeInfo( columnNameProperty , columnTypeProperty , projectionColumnNames );
  }

  inspector = MDSObjectInspectorFactory.craeteObjectInspectorFromTypeInfo( rootType );
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:22,代码来源:MDSSerde.java

示例2: HiveVectorizedReaderSetting

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
public HiveVectorizedReaderSetting( final FileSplit split , final JobConf job , final HiveReaderSetting hiveReaderConfig ) throws IOException{
  this.hiveReaderConfig = hiveReaderConfig;

  rbCtx = Utilities.getVectorizedRowBatchCtx( job );
  partitionValues = new Object[rbCtx.getPartitionColumnCount()];
  if( 0 < partitionValues.length ){
    rbCtx.getPartitionValues( rbCtx, job, split, partitionValues );
  }

  TypeInfo[] typeInfos = rbCtx.getRowColumnTypeInfos();
  columnNames = rbCtx.getRowColumnNames();
  needColumnIds = createNeedColumnId( ColumnProjectionUtils.getReadColumnIDs( job ) );

  projectionColumn = new boolean[columnNames.length];
  assignors = new IColumnVectorAssignor[columnNames.length];
  for( int id : needColumnIds ){
    projectionColumn[id] = true;
    assignors[id] = ColumnVectorAssignorFactory.create( typeInfos[id] );
  }
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:21,代码来源:HiveVectorizedReaderSetting.java

示例3: readAndAssertOnEmptyCols

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
/**
   * Read using record reader and assert that the columns not requested have 0 length.
   * <p>
   * @param conf       the reader configuration -- must have the region name
   * @param split      the input-split containing the records to be read
   * @param predicates the predicates to filter out unwanted results
   * @param readColIds the column ids to retrieve
   * @return total number of records read
   */
  private long readAndAssertOnEmptyCols(final Configuration conf, final InputSplit split,
                                        final String readColIds, final Filter[] predicates) throws IOException{
    MonarchRecordReader mrr = new MonarchRecordReader(conf);
    FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL);
    if (predicates != null) {
      for (int i = 0; i < predicates.length; i++) {
        filterList.addFilter(predicates[i]);
      }
      mrr.pushDownfilters = filterList;
    }
//    mrr.readColIds = readColIds;

    /*List<Integer> readColIdList = readColIds == null ? Collections.emptyList() :
      Arrays.stream(readColIds.split(",")).mapToInt(Integer::valueOf)
        .collect(ArrayList::new, ArrayList::add, ArrayList::addAll);*/
    List<Integer> readColIdList = ColumnProjectionUtils.getReadColumnIDs(conf);
    long size = 0;
    try {
      mrr.initialize(split, conf);
      Writable key = mrr.createKey();
      Writable value = mrr.createValue();
      while (mrr.next(key, value)) {
        BytesRefArrayWritable braw = (BytesRefArrayWritable) value;
        /** assert that skipped (not read) columns have 0 length **/
        for (int i = 0; i < braw.size(); i++) {
          if (!readColIdList.isEmpty() && !readColIdList.contains(i)) {
            assertEquals(0, braw.get(i).getLength());
          }
        }
        ++size;
      }
      mrr.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
    return size;
  }
 
开发者ID:ampool,项目名称:monarch,代码行数:47,代码来源:MonarchRecordReaderTest.java

示例4: getIncludeColumns

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
private void getIncludeColumns(Configuration conf, Segment segment) {
    List<ColumnSchema> segColSchemas = segment.schema().getColumns();
    String columnNamesStr = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
    if (ColumnProjectionUtils.isReadAllColumns(conf) ||
            columnNamesStr == null) {
        projectCols = new ColumnSchema[segColSchemas.size()];
        projectColIds = new int[segColSchemas.size()];
        for (int i = 0; i < segColSchemas.size(); i++) {
            projectCols[i] = segColSchemas.get(i);
            projectColIds[i] = i;
        }
    } else {
        String[] ss = Strings.isEmpty(columnNamesStr.trim()) ? new String[]{} : columnNamesStr.split(",");
        projectCols = new ColumnSchema[ss.length];
        projectColIds = new int[ss.length];
        for (int i = 0; i < ss.length; i++) {
            String col = ss[i];
            int colId = Trick.indexFirst(segColSchemas, c -> c.getName().equalsIgnoreCase(col));
            //Preconditions.checkState(colId >= 0, String.format("Column [%s] not found in segment [%s]", col, segment.name()));
            if (colId < 0) {
                projectCols[i] = null;
                projectColIds[i] = -1;
            } else {
                projectCols[i] = segColSchemas.get(colId);
                projectColIds[i] = colId;
            }
        }
    }
}
 
开发者ID:shunfei,项目名称:indexr,代码行数:30,代码来源:IndexRRecordReader.java

示例5: initProperties

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
private static void initProperties(
        Properties props,
        Configuration conf,
        List<HCatTableColumn> columns,
        List<OutputColumnSpec> outputColumns) throws Exception {
    String colNames = "";
    String colTypes = "";
    for (HCatTableColumn colInfo : columns) {
        colNames += colInfo.getName() + ",";
        colTypes += colInfo.getDataType() + ",";
    }
    if (colNames.length() > 0)
        colNames = colNames.substring(0, colNames.length() - 1);
    if (colTypes.length() > 0)
        colTypes = colTypes.substring(0, colTypes.length() - 1);
    props.put(serdeConstants.LIST_COLUMNS, colNames);
    props.put(serdeConstants.LIST_COLUMN_TYPES, colTypes);
    props.put(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
    // Fix for Avro (NullPointerException if null)
    if (props.getProperty("columns.comments") == null) {
        props.put("columns.comments", "");
    }
    // Pushdown projection if we don't need all columns
    Set<Integer> requiredColumns = new HashSet<>();
    for (OutputColumnSpec spec : outputColumns) {
        if (spec.getColumnPosition() < columns.size()) {
            requiredColumns.add(spec.getColumnPosition());
        }
    }
    if (requiredColumns.size() < columns.size()) {
        ColumnProjectionUtils.appendReadColumns(conf, new ArrayList<>(requiredColumns));
    }
}
 
开发者ID:EXASOL,项目名称:hadoop-etl-udfs,代码行数:34,代码来源:HdfsSerDeImportService.java

示例6: setReadColumns

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
/**
 * Sets which fields are to be read from the ORC file
 */
static void setReadColumns(Configuration conf, StructTypeInfo actualStructTypeInfo) {
  StructTypeInfo readStructTypeInfo = getTypeInfo(conf);
  LOG.info("Read StructTypeInfo: {}", readStructTypeInfo);

  List<Integer> ids = new ArrayList<>();
  List<String> names = new ArrayList<>();

  List<String> readNames = readStructTypeInfo.getAllStructFieldNames();
  List<String> actualNames = actualStructTypeInfo.getAllStructFieldNames();

  for (int i = 0; i < actualNames.size(); i++) {
    String actualName = actualNames.get(i);
    if (readNames.contains(actualName)) {
      // make sure they are the same type
      TypeInfo actualTypeInfo = actualStructTypeInfo.getStructFieldTypeInfo(actualName);
      TypeInfo readTypeInfo = readStructTypeInfo.getStructFieldTypeInfo(actualName);
      if (!actualTypeInfo.equals(readTypeInfo)) {
        throw new IllegalStateException("readTypeInfo [" + readTypeInfo + "] does not match actualTypeInfo ["
            + actualTypeInfo + "]");
      }
      // mark the column as to-be-read
      ids.add(i);
      names.add(actualName);
    }
  }
  if (ids.size() == 0) {
    throw new IllegalStateException("None of the selected columns were found in the ORC file.");
  }
  LOG.info("Set column projection on columns: {} ({})", ids, names);
  ColumnProjectionUtils.appendReadColumns(conf, ids, names);
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:35,代码来源:CorcInputFormat.java

示例7: setInputReadColumnProjection

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
@Test
public void setInputReadColumnProjection() {
  StructTypeInfo typeInfo = new StructTypeInfoBuilder()
      .add("a", TypeInfoFactory.stringTypeInfo)
      .add("b", TypeInfoFactory.longTypeInfo)
      .build();

  conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:string>");

  CorcInputFormat.setReadColumns(conf, typeInfo);

  assertThat(conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true), is(false));
  assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR), is("a"));
  assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR), is("0"));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:16,代码来源:CorcInputFormatTest.java

示例8: setInputReadColumnsAll

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
@Test
public void setInputReadColumnsAll() {
  StructTypeInfo typeInfo = new StructTypeInfoBuilder()
      .add("a", TypeInfoFactory.stringTypeInfo)
      .add("b", TypeInfoFactory.longTypeInfo)
      .build();

  conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:string,b:bigint>");

  CorcInputFormat.setReadColumns(conf, typeInfo);

  assertThat(conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true), is(false));
  assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR), is("a,b"));
  assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR), is("0,1"));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:16,代码来源:CorcInputFormatTest.java

示例9: setInputReadColumnsMissing

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
@Test
public void setInputReadColumnsMissing() {
  StructTypeInfo typeInfo = new StructTypeInfoBuilder()
      .add("a", TypeInfoFactory.stringTypeInfo)
      .add("b", TypeInfoFactory.longTypeInfo)
      .build();

  conf.set(CorcInputFormat.INPUT_TYPE_INFO, "struct<a:string,b:bigint,c:string>");

  CorcInputFormat.setReadColumns(conf, typeInfo);

  assertThat(conf.getBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, true), is(false));
  assertThat(conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR), is("0,1"));
}
 
开发者ID:HotelsDotCom,项目名称:corc,代码行数:15,代码来源:CorcInputFormatTest.java

示例10: init

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
/**
 *
 * It creates the readContext for Parquet side with the requested schema during the init phase.
 *
 * @param configuration needed to get the wanted columns
 * @param keyValueMetaData // unused
 * @param fileSchema parquet file schema
 * @return the parquet ReadContext
 */
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(final Configuration configuration,
    final Map<String, String> keyValueMetaData, final MessageType fileSchema) {
  final String columns = configuration.get(IOConstants.COLUMNS);
  final Map<String, String> contextMetadata = new HashMap<String, String>();
  if (columns != null) {
    final List<String> listColumns = getColumns(columns);

    final List<Type> typeListTable = new ArrayList<Type>();
    for (final String col : listColumns) {
      // listColumns contains partition columns which are metadata only
      if (fileSchema.containsField(col)) {
        typeListTable.add(fileSchema.getType(col));
      } else {
        // below allows schema evolution
        typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
      }
    }
    MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable);
    contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString());

    MessageType requestedSchemaByUser = tableSchema;
    final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);

    final List<Type> typeListWanted = new ArrayList<Type>();
    for (final Integer idx : indexColumnsWanted) {
      typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
    }
    requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
            typeListWanted), fileSchema, configuration);

    return new ReadContext(requestedSchemaByUser, contextMetadata);
  } else {
    contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString());
    return new ReadContext(fileSchema, contextMetadata);
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:47,代码来源:DataWritableReadSupport.java

示例11: setLocation

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
@Override
public void setLocation(String location, Job job) throws IOException {
    Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
    if (!UDFContext.getUDFContext().isFrontend()) {
        typeInfo = (TypeInfo)ObjectSerializer.deserialize(p.getProperty(signature + SchemaSignatureSuffix));
    } else if (typeInfo == null) {
        typeInfo = getTypeInfo(location, job);
    }
    if (typeInfo != null && oi == null) {
        oi = OrcStruct.createObjectInspector(typeInfo);
    }
    if (!UDFContext.getUDFContext().isFrontend()) {
        if (p.getProperty(signature + RequiredColumnsSuffix) != null) {
            mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p
                    .getProperty(signature + RequiredColumnsSuffix));
            job.getConfiguration().setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
            job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,
                    getReqiredColumnIdString(mRequiredColumns));
            if (p.getProperty(signature + SearchArgsSuffix) != null) {
                // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set
                job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
                        getReqiredColumnNamesString(getSchema(location, job), mRequiredColumns));
            }
        } else if (p.getProperty(signature + SearchArgsSuffix) != null) {
            // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set
            job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
                    getReqiredColumnNamesString(getSchema(location, job)));
        }
        if (p.getProperty(signature + SearchArgsSuffix) != null) {
            job.getConfiguration().set(SARG_PUSHDOWN, p.getProperty(signature + SearchArgsSuffix));
        }

    }
    FileInputFormat.setInputPaths(job, location);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:36,代码来源:OrcStorage.java

示例12: findIncludedColumns

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
/**
 * Take the configuration and figure out which columns we need to include.
 * @param types the types of the file
 * @param conf the configuration
 * @return true for each column that should be included
 */
private static boolean[] findIncludedColumns(List<OrcProto.Type> types,
                                             Configuration conf) {
  String includedStr =
      conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
  if (includedStr == null || includedStr.trim().length() == 0) {
    return null;
  } else {
    int numColumns = types.size();
    boolean[] result = new boolean[numColumns];
    result[0] = true;
    OrcProto.Type root = types.get(0);
    List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
    for(int i=0; i < root.getSubtypesCount(); ++i) {
      if (included.contains(i)) {
        includeColumnRecursive(types, result, root.getSubtypes(i));
      }
    }
    // if we are filtering at least one column, return the boolean array
    for(boolean include: result) {
      if (!include) {
        return result;
      }
    }
    return null;
  }
}
 
开发者ID:facebookarchive,项目名称:hive-dwrf,代码行数:33,代码来源:OrcInputFormat.java

示例13: HiveReaderSetting

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
public HiveReaderSetting( final FileSplit split, final JobConf job ){
  config = new Configuration();

  disableSkipBlock = job.getBoolean( "mds.disable.block.skip" , false );
  disableFilterPushdown = job.getBoolean( "mds.disable.filter.pushdown" , false );

  Set<String> pathNameSet= createPathSet( split.getPath() );
  List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
  String filterExprSerialized = job.get( TableScanDesc.FILTER_EXPR_CONF_STR );
  if( filterExprSerialized != null ){
    filterExprs.add( SerializationUtilities.deserializeExpression(filterExprSerialized) );
  }

  MapWork mapWork;
  try{
    mapWork = Utilities.getMapWork(job);
  }catch( Exception e ){
    mapWork = null;
  }

  if( mapWork == null ){
    node = createExpressionNode( filterExprs );
    isVectorModeFlag = false;
    return;
  }

  node = createExpressionNode( filterExprs );

  for( Map.Entry<String,PartitionDesc> pathsAndParts: mapWork.getPathToPartitionInfo().entrySet() ){
    if( ! pathNameSet.contains( pathsAndParts.getKey() ) ){
      continue;
    }
    Properties props = pathsAndParts.getValue().getTableDesc().getProperties();
    if( props.containsKey( "mds.expand" ) ){
      config.set( "spread.reader.expand.column" , props.getProperty( "mds.expand" ) );
    }
    if( props.containsKey( "mds.flatten" ) ){
      config.set( "spread.reader.flatten.column" , props.getProperty( "mds.flatten" ) );
    }
  }

  config.set( "spread.reader.read.column.names" , createReadColumnNames( job.get( ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR , null ) ) );

  // Next Hive vesion;
  // Utilities.getUseVectorizedInputFileFormat(job)
  isVectorModeFlag = Utilities.isVectorMode( job );
}
 
开发者ID:yahoojapan,项目名称:multiple-dimension-spread,代码行数:48,代码来源:HiveReaderSetting.java

示例14: initialize

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
public void initialize(final InputSplit split, final Configuration conf) throws IOException {
  this.startTime = System.currentTimeMillis();

  /** batch size for reading multiple records together **/
  batchSize = NumberUtils.toInt(conf.get(MonarchUtils.MONARCH_BATCH_SIZE), MonarchUtils.MONARCH_BATCH_SIZE_DEFAULT);
  final MonarchSplit ms = (MonarchSplit) split;

  this.readColIds = ColumnProjectionUtils.getReadColumnIDs(conf);

  final String expression = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
  if (expression != null && columns != null) {
    this.pushDownfilters = getPushDownFilters(expression, columns);
    if (this.pushDownfilters != null) {
      for (Filter mFilter : pushDownfilters.getFilters()) {
        logger.info("Pushing filter= {}", mFilter);
        int id=-1;
        for(int i=0; i< columns.length; i++){
          if(columns[i].equalsIgnoreCase(((SingleColumnValueFilter) mFilter).getColumnNameString())) {
            id =i;
            break;
          }
        }
        if (!readColIds.contains(id) && readColIds.size() > 0 && id != -1) {
          readColIds.add(id);
        }
      }
    }
  }

  /** create the scan with required parameters.. **/
  Scan scan = new Scan();
  scan.setBucketIds(ms.getBucketIds());
  scan.setBatchSize(batchSize);
  scan.setReturnKeysFlag(false);
  final String str = conf.get(MonarchUtils.READ_FILTER_ON_LATEST_VERSION);
  if (str != null) {
    scan.setFilterOnLatestVersionOnly(Boolean.getBoolean(str));
  }
  final boolean isOldestFirst = Boolean.getBoolean(conf.get(MonarchUtils.READ_OLDEST_FIRST));
  final int maxVersions = NumberUtils.toInt(conf.get(MonarchUtils.READ_MAX_VERSIONS), 1);
  scan.setMaxVersions(maxVersions, isOldestFirst);

  if (pushDownfilters != null) {
    scan.setFilter(pushDownfilters);
  }

  scan.setBucketToServerMap(ms.getBucketToServerMap());
  Collections.sort(this.readColIds);

  if (! readColIds.isEmpty()) {
    scan.setColumns(readColIds);
  }
  logger.info("Retrieving columns= {}", scan.getColumns());

  this.mResultScanner = this.anyTable.getScanner(scan);
  this.valueIterator = this.mResultScanner.iterator();

  if (logger.isDebugEnabled()) {
    logger.debug("{} - Initialize MonarchRecordReader: batchSize= {}, split= {}", new Date(), batchSize, ms);
  }
}
 
开发者ID:ampool,项目名称:monarch,代码行数:62,代码来源:MonarchRecordReader.java

示例15: initialize

import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
  String[] cols = tbl.getProperty("columns").split(",");
  String types = tbl.getProperty("columns.types");
  if (types == null) {
    types = Collections.nCopies(cols.length, "string").stream().collect(Collectors.joining(","));
  }

  this.columnList = Arrays.asList(cols);
  this.typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(types);

  /** initialize storage for fields **/
  int size = columnList.size();
  field = new BytesRefWritable[size];
  for (int i = 0; i < size; i++) {
    field[i] = new BytesRefWritable();
    serializeCache.set(i, field[i]);
  }
  serializedSize = 0;

  /** the columns to skip **/
  List notSkipIDs = new ArrayList();
  if(conf != null && !ColumnProjectionUtils.isReadAllColumns(conf)) {
    notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(conf);
  } else {
    for(int i = 0; i < typeInfoList.size(); ++i) {
      notSkipIDs.add(i);
    }
  }

  /**
   * create the object inspector for row.. use native Java object inspectors for
   * the objects for which deserialization is done by us and not Hive.
   * Cache Monarch object types as well.. for all rows (serialize)..
   */
  List<ObjectInspector> oiList = new ArrayList<>(columnList.size());
  this.objectTypeList = new ArrayList<>(columnList.size());
  for (final TypeInfo ti : typeInfoList) {
    DataType type = null;
    try {
      type = MonarchPredicateHandler.getMonarchFieldType(ti.getTypeName());
    } catch (Exception e) {
      //
    }
    if (type != null) {
      oiList.add(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(ti));
    } else {
      oiList.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(ti));
    }
    this.objectTypeList.add(type);
  }
  this.rowOI = ObjectInspectorFactory.getColumnarStructObjectInspector(columnList, oiList);

  /** Initialize the lazy structure for on-demand de-serialization **/
  this.cachedLazyStruct = new MonarchColumnarStruct(rowOI, notSkipIDs);
}
 
开发者ID:ampool,项目名称:monarch,代码行数:58,代码来源:MonarchSerDe.java


注:本文中的org.apache.hadoop.hive.serde2.ColumnProjectionUtils类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。