当前位置: 首页>>代码示例>>Java>>正文


Java OrcSerde类代码示例

本文整理汇总了Java中org.apache.hadoop.hive.ql.io.orc.OrcSerde的典型用法代码示例。如果您正苦于以下问题:Java OrcSerde类的具体用法?Java OrcSerde怎么用?Java OrcSerde使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


OrcSerde类属于org.apache.hadoop.hive.ql.io.orc包,在下文中一共展示了OrcSerde类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getFromTypeInfo

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static OrcSerde getFromTypeInfo( final Configuration config , final TypeInfo typeInfo )throws IOException{
  ObjectInspector objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( typeInfo );
  if( !( typeInfo instanceof StructTypeInfo ) ){
    throw new IOException( "Input type info is not StructTypeInfo : " + typeInfo.toString() );
  }
  String columnsName = "";
  String columnsType = "";
  List<TypeInfo> typeInfoList = ( (StructTypeInfo)typeInfo ).getAllStructFieldTypeInfos();
  List<StructField> structField = (List<StructField>)( ( (StructObjectInspector)objectInspector ).getAllStructFieldRefs() );
  for( int i = 0 ; i < structField.size() ; i++ ){
    if( ! columnsName.isEmpty() ){
      columnsName = columnsName.concat( "," );
      columnsType = columnsType.concat( "," );
    }
    columnsName = columnsName.concat( structField.get(i).getFieldName() );
    columnsType = columnsType.concat( typeInfoList.get(i).toString() );
  }

  OrcSerde serde = new OrcSerde();
  Properties table = new Properties();
  table.setProperty( serdeConstants.LIST_COLUMNS , columnsName );
  table.setProperty( serdeConstants.LIST_COLUMN_TYPES , columnsType );
  serde.initialize( config , table );

  return serde;
}
 
开发者ID:yahoojapan,项目名称:dataplatform-schema-lib,代码行数:27,代码来源:OrcSerdeFactory.java

示例2: createMultiStripeFile

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
private static void createMultiStripeFile(File file)
        throws IOException, ReflectiveOperationException, SerDeException
{
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector);

    @SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);

    for (int i = 0; i < 300; i += 3) {
        if ((i > 0) && (i % 60 == 0)) {
            flushWriter(writer);
        }

        objectInspector.setStructFieldData(row, field, (long) i);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }

    writer.close(false);
}
 
开发者ID:y-lan,项目名称:presto,代码行数:23,代码来源:TestOrcReaderPositions.java

示例3: createSequentialFile

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
private static void createSequentialFile(File file, int count)
        throws IOException, ReflectiveOperationException, SerDeException
{
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector);

    @SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);

    for (int i = 0; i < count; i++) {
        objectInspector.setStructFieldData(row, field, (long) i);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }

    writer.close(false);
}
 
开发者ID:y-lan,项目名称:presto,代码行数:19,代码来源:TestOrcReaderPositions.java

示例4: configure

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
@Override
public void configure() {
	super.configure();
    this.orcSerde = new OrcSerde();
    this.outputFormat = new OrcOutputFormat();
    
    this.columnTypeList = Lists.newArrayList();
    for(String columnType : columnTypes) {
        this.columnTypeList.add(HdfsUtil.columnTypeToObjectInspetor(columnType));
    }
    this.inspector = ObjectInspectorFactory
            .getStandardStructObjectInspector(this.columnNames, this.columnTypeList);

    Class<? extends CompressionCodec> codecClass = null;
    if(CompressEnum.NONE.name().equalsIgnoreCase(compress)){
        codecClass = null;
    } else if(CompressEnum.GZIP.name().equalsIgnoreCase(compress)){
        codecClass = org.apache.hadoop.io.compress.GzipCodec.class;
    } else if (CompressEnum.BZIP2.name().equalsIgnoreCase(compress)) {
        codecClass = org.apache.hadoop.io.compress.BZip2Codec.class;
    } else if(CompressEnum.SNAPPY.name().equalsIgnoreCase(compress)) {
        //todo 等需求明确后支持 需要用户安装SnappyCodec
        codecClass = org.apache.hadoop.io.compress.SnappyCodec.class;
    } else {
        throw new IllegalArgumentException("Unsupported compress format: " + compress);
    }

    if(codecClass != null){
        this.outputFormat.setOutputCompressorClass(jobConf, codecClass);
    }
}
 
开发者ID:DTStack,项目名称:jlogstash-output-plugin,代码行数:32,代码来源:HdfsOrcOutputFormat.java

示例5: getObjectInspector

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static ObjectInspector getObjectInspector( final OrcSerde serde )throws IOException{
  try{
    return serde.getObjectInspector();
  }catch( SerDeException e ){
    throw new IOException( e );
  }
}
 
开发者ID:yahoojapan,项目名称:dataplatform-schema-lib,代码行数:8,代码来源:OrcSerdeFactory.java

示例6: createTable

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public void createTable() throws TException {
    HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf());

    LOG.info("HIVE: Dropping hive table: " + propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY));
    hiveClient.dropTable(propertyParser.getProperty(ConfigVars.HIVE_BOLT_DATABASE_KEY),
            propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY),
            true, true);

    // Define the cols
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("id", Constants.INT_TYPE_NAME, ""));
    cols.add(new FieldSchema("msg", Constants.STRING_TYPE_NAME, ""));

    // Values for the StorageDescriptor
    String location = new File(propertyParser.getProperty(
            ConfigVars.HIVE_TEST_TABLE_LOCATION_KEY)).getAbsolutePath();
    String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
    String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
    int numBuckets = 16;
    Map<String,String> orcProps = new HashMap<String, String>();
    orcProps.put("orc.compress", "NONE");
    SerDeInfo serDeInfo = new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps);
    List<String> bucketCols = new ArrayList<String>();
    bucketCols.add("id");

    // Build the StorageDescriptor
    StorageDescriptor sd = new StorageDescriptor();
    sd.setCols(cols);
    sd.setLocation(location);
    sd.setInputFormat(inputFormat);
    sd.setOutputFormat(outputFormat);
    sd.setNumBuckets(numBuckets);
    sd.setSerdeInfo(serDeInfo);
    sd.setBucketCols(bucketCols);
    sd.setSortCols(new ArrayList<Order>());
    sd.setParameters(new HashMap<String, String>());

    // Define the table
    Table tbl = new Table();
    tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_BOLT_DATABASE_KEY));
    tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY));
    tbl.setSd(sd);
    tbl.setOwner(System.getProperty("user.name"));
    tbl.setParameters(new HashMap<String, String>());
    tbl.setViewOriginalText("");
    tbl.setViewExpandedText("");
    tbl.setTableType(TableType.MANAGED_TABLE.name());
    List<FieldSchema> partitions = new ArrayList<FieldSchema>();
    partitions.add(new FieldSchema("dt", Constants.STRING_TYPE_NAME, ""));
    tbl.setPartitionKeys(partitions);

    // Create the table
    hiveClient.createTable(tbl);

    // Describe the table
    Table createdTable = hiveClient.getTable(propertyParser.getProperty(ConfigVars.HIVE_BOLT_DATABASE_KEY),
            propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY));
    LOG.info("HIVE: Created Table: " + createdTable.toString());
}
 
开发者ID:sakserv,项目名称:storm-topology-examples,代码行数:60,代码来源:KafkaHiveHdfsTopologyTest.java

示例7: createPageSource

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
@Override
public Optional<? extends ConnectorPageSource> createPageSource(
        Configuration configuration,
        ConnectorSession session,
        Path path,
        long start,
        long length,
        Properties schema,
        List<HiveColumnHandle> columns,
        List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> effectivePredicate,
        DateTimeZone hiveStorageTimeZone)
{
    if (!isOptimizedReaderEnabled(session)) {
        return Optional.empty();
    }

    if (!isDeserializerClass(schema, OrcSerde.class)) {
        return Optional.empty();
    }

    return Optional.of(createOrcPageSource(
            new OrcMetadataReader(),
            configuration,
            path,
            start,
            length,
            columns,
            partitionKeys,
            effectivePredicate,
            hiveStorageTimeZone,
            typeManager,
            getOrcMaxMergeDistance(session),
            getOrcMaxBufferSize(session),
            getOrcStreamBufferSize(session)));
}
 
开发者ID:y-lan,项目名称:presto,代码行数:37,代码来源:OrcPageSourceFactory.java

示例8: TestPreparer

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public TestPreparer(String tempFilePath)
        throws Exception
{
    OrcSerde serde = new OrcSerde();
    schema = new Properties();
    schema.setProperty("columns",
            testColumns.stream()
                    .map(TestColumn::getName)
                    .collect(Collectors.joining(",")));
    schema.setProperty("columns.types",
            testColumns.stream()
                    .map(TestColumn::getType)
                    .collect(Collectors.joining(",")));
    schema.setProperty(FILE_INPUT_FORMAT, OrcInputFormat.class.getName());
    schema.setProperty(SERIALIZATION_LIB, serde.getClass().getName());

    partitionKeys = testColumns.stream()
            .filter(TestColumn::isPartitionKey)
            .map(input -> new HivePartitionKey(input.getName(), HiveType.valueOf(input.getObjectInspector().getTypeName()), (String) input.getWriteValue()))
            .collect(toList());

    ImmutableList.Builder<HiveColumnHandle> columnsBuilder = ImmutableList.builder();
    ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
    int nextHiveColumnIndex = 0;
    for (int i = 0; i < testColumns.size(); i++) {
        TestColumn testColumn = testColumns.get(i);
        int columnIndex = testColumn.isPartitionKey() ? -1 : nextHiveColumnIndex++;

        ObjectInspector inspector = testColumn.getObjectInspector();
        HiveType hiveType = HiveType.valueOf(inspector.getTypeName());
        Type type = hiveType.getType(TYPE_MANAGER);

        columnsBuilder.add(new HiveColumnHandle("client_id", testColumn.getName(), hiveType, type.getTypeSignature(), columnIndex, testColumn.isPartitionKey()));
        typesBuilder.add(type);
    }
    columns = columnsBuilder.build();
    types = typesBuilder.build();

    fileSplit = createTestFile(tempFilePath, new OrcOutputFormat(), serde, null, testColumns, NUM_ROWS);
}
 
开发者ID:y-lan,项目名称:presto,代码行数:41,代码来源:TestOrcPageSourceMemoryTracking.java

示例9: getDataFormat

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static String getDataFormat(StorageDescriptor descriptor) {
  Preconditions.checkNotNull(descriptor);

  String serde = descriptor.getSerdeInfo().getSerializationLib();
  String inputFormat = descriptor.getInputFormat();

  if (LazySimpleSerDe.class.getName().equals(serde)) {
    if (TextInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.TEXT;
    } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinarySerDe.class.getName().equals(serde)) {
    if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) {
    if (RCFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.RCFILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (ParquetHiveSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.PARQUET;
  } else if (AvroSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.AVRO;
  } else if (OrcSerde.class.getName().equals(serde)) {
    return BuiltinStorages.ORC;
  } else if (RegexSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.REGEX;
  } else {
    throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
  }
}
 
开发者ID:apache,项目名称:tajo,代码行数:39,代码来源:HiveCatalogUtil.java

示例10: getFromString

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static OrcSerde getFromString( final Configuration config , final String schemaString )throws IOException{
  return getFromTypeInfo( config , TypeInfoUtils.getTypeInfoFromTypeString( schemaString ) );
}
 
开发者ID:yahoojapan,项目名称:dataplatform-schema-lib,代码行数:4,代码来源:OrcSerdeFactory.java

示例11: getFromObjectInspector

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static OrcSerde getFromObjectInspector( final Configuration config , final ObjectInspector ois ) throws IOException{
  return getFromTypeInfo( config , TypeInfoUtils.getTypeInfoFromObjectInspector( ois ) );
}
 
开发者ID:yahoojapan,项目名称:dataplatform-schema-lib,代码行数:4,代码来源:OrcSerdeFactory.java

示例12: createSerializer

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
private static OrcSerde createSerializer(Configuration conf, Properties properties)
{
    OrcSerde serde = new OrcSerde();
    serde.initialize(conf, properties);
    return serde;
}
 
开发者ID:y-lan,项目名称:presto,代码行数:7,代码来源:OrcFileWriter.java

示例13: createHiveRecordCursor

import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(
        String clientId,
        Configuration configuration,
        ConnectorSession session,
        Path path,
        long start,
        long length,
        Properties schema,
        List<HiveColumnHandle> columns,
        List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> effectivePredicate,
        DateTimeZone hiveStorageTimeZone,
        TypeManager typeManager)
{
    if (!enabled) {
        return Optional.empty();
    }

    if (!isDeserializerClass(schema, OrcSerde.class)) {
        return Optional.empty();
    }

    RecordReader recordReader;
    try {
        FileSystem fileSystem = path.getFileSystem(configuration);
        Reader reader = OrcFile.createReader(fileSystem, path);
        boolean[] include = findIncludedColumns(reader.getTypes(), columns);
        recordReader = reader.rows(start, length, include);
    }
    catch (Exception e) {
        throw Throwables.propagate(e);
    }

    return Optional.<HiveRecordCursor>of(new OrcHiveRecordCursor(
            recordReader,
            length,
            schema,
            partitionKeys,
            columns,
            hiveStorageTimeZone,
            typeManager));
}
 
开发者ID:y-lan,项目名称:presto,代码行数:44,代码来源:OrcRecordCursorProvider.java


注:本文中的org.apache.hadoop.hive.ql.io.orc.OrcSerde类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。