本文整理汇总了Java中org.apache.hadoop.hive.ql.io.orc.OrcSerde类的典型用法代码示例。如果您正苦于以下问题:Java OrcSerde类的具体用法?Java OrcSerde怎么用?Java OrcSerde使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
OrcSerde类属于org.apache.hadoop.hive.ql.io.orc包,在下文中一共展示了OrcSerde类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFromTypeInfo
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static OrcSerde getFromTypeInfo( final Configuration config , final TypeInfo typeInfo )throws IOException{
ObjectInspector objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( typeInfo );
if( !( typeInfo instanceof StructTypeInfo ) ){
throw new IOException( "Input type info is not StructTypeInfo : " + typeInfo.toString() );
}
String columnsName = "";
String columnsType = "";
List<TypeInfo> typeInfoList = ( (StructTypeInfo)typeInfo ).getAllStructFieldTypeInfos();
List<StructField> structField = (List<StructField>)( ( (StructObjectInspector)objectInspector ).getAllStructFieldRefs() );
for( int i = 0 ; i < structField.size() ; i++ ){
if( ! columnsName.isEmpty() ){
columnsName = columnsName.concat( "," );
columnsType = columnsType.concat( "," );
}
columnsName = columnsName.concat( structField.get(i).getFieldName() );
columnsType = columnsType.concat( typeInfoList.get(i).toString() );
}
OrcSerde serde = new OrcSerde();
Properties table = new Properties();
table.setProperty( serdeConstants.LIST_COLUMNS , columnsName );
table.setProperty( serdeConstants.LIST_COLUMN_TYPES , columnsType );
serde.initialize( config , table );
return serde;
}
示例2: createMultiStripeFile
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
private static void createMultiStripeFile(File file)
throws IOException, ReflectiveOperationException, SerDeException
{
FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector);
@SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector);
Object row = objectInspector.create();
StructField field = objectInspector.getAllStructFieldRefs().get(0);
for (int i = 0; i < 300; i += 3) {
if ((i > 0) && (i % 60 == 0)) {
flushWriter(writer);
}
objectInspector.setStructFieldData(row, field, (long) i);
Writable record = serde.serialize(row, objectInspector);
writer.write(record);
}
writer.close(false);
}
示例3: createSequentialFile
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
private static void createSequentialFile(File file, int count)
throws IOException, ReflectiveOperationException, SerDeException
{
FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector);
@SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector);
Object row = objectInspector.create();
StructField field = objectInspector.getAllStructFieldRefs().get(0);
for (int i = 0; i < count; i++) {
objectInspector.setStructFieldData(row, field, (long) i);
Writable record = serde.serialize(row, objectInspector);
writer.write(record);
}
writer.close(false);
}
示例4: configure
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
@Override
public void configure() {
super.configure();
this.orcSerde = new OrcSerde();
this.outputFormat = new OrcOutputFormat();
this.columnTypeList = Lists.newArrayList();
for(String columnType : columnTypes) {
this.columnTypeList.add(HdfsUtil.columnTypeToObjectInspetor(columnType));
}
this.inspector = ObjectInspectorFactory
.getStandardStructObjectInspector(this.columnNames, this.columnTypeList);
Class<? extends CompressionCodec> codecClass = null;
if(CompressEnum.NONE.name().equalsIgnoreCase(compress)){
codecClass = null;
} else if(CompressEnum.GZIP.name().equalsIgnoreCase(compress)){
codecClass = org.apache.hadoop.io.compress.GzipCodec.class;
} else if (CompressEnum.BZIP2.name().equalsIgnoreCase(compress)) {
codecClass = org.apache.hadoop.io.compress.BZip2Codec.class;
} else if(CompressEnum.SNAPPY.name().equalsIgnoreCase(compress)) {
//todo 等需求明确后支持 需要用户安装SnappyCodec
codecClass = org.apache.hadoop.io.compress.SnappyCodec.class;
} else {
throw new IllegalArgumentException("Unsupported compress format: " + compress);
}
if(codecClass != null){
this.outputFormat.setOutputCompressorClass(jobConf, codecClass);
}
}
示例5: getObjectInspector
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static ObjectInspector getObjectInspector( final OrcSerde serde )throws IOException{
try{
return serde.getObjectInspector();
}catch( SerDeException e ){
throw new IOException( e );
}
}
示例6: createTable
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public void createTable() throws TException {
HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveLocalMetaStore.getHiveConf());
LOG.info("HIVE: Dropping hive table: " + propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY));
hiveClient.dropTable(propertyParser.getProperty(ConfigVars.HIVE_BOLT_DATABASE_KEY),
propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY),
true, true);
// Define the cols
List<FieldSchema> cols = new ArrayList<FieldSchema>();
cols.add(new FieldSchema("id", Constants.INT_TYPE_NAME, ""));
cols.add(new FieldSchema("msg", Constants.STRING_TYPE_NAME, ""));
// Values for the StorageDescriptor
String location = new File(propertyParser.getProperty(
ConfigVars.HIVE_TEST_TABLE_LOCATION_KEY)).getAbsolutePath();
String inputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat";
String outputFormat = "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat";
int numBuckets = 16;
Map<String,String> orcProps = new HashMap<String, String>();
orcProps.put("orc.compress", "NONE");
SerDeInfo serDeInfo = new SerDeInfo(OrcSerde.class.getSimpleName(), OrcSerde.class.getName(), orcProps);
List<String> bucketCols = new ArrayList<String>();
bucketCols.add("id");
// Build the StorageDescriptor
StorageDescriptor sd = new StorageDescriptor();
sd.setCols(cols);
sd.setLocation(location);
sd.setInputFormat(inputFormat);
sd.setOutputFormat(outputFormat);
sd.setNumBuckets(numBuckets);
sd.setSerdeInfo(serDeInfo);
sd.setBucketCols(bucketCols);
sd.setSortCols(new ArrayList<Order>());
sd.setParameters(new HashMap<String, String>());
// Define the table
Table tbl = new Table();
tbl.setDbName(propertyParser.getProperty(ConfigVars.HIVE_BOLT_DATABASE_KEY));
tbl.setTableName(propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY));
tbl.setSd(sd);
tbl.setOwner(System.getProperty("user.name"));
tbl.setParameters(new HashMap<String, String>());
tbl.setViewOriginalText("");
tbl.setViewExpandedText("");
tbl.setTableType(TableType.MANAGED_TABLE.name());
List<FieldSchema> partitions = new ArrayList<FieldSchema>();
partitions.add(new FieldSchema("dt", Constants.STRING_TYPE_NAME, ""));
tbl.setPartitionKeys(partitions);
// Create the table
hiveClient.createTable(tbl);
// Describe the table
Table createdTable = hiveClient.getTable(propertyParser.getProperty(ConfigVars.HIVE_BOLT_DATABASE_KEY),
propertyParser.getProperty(ConfigVars.HIVE_BOLT_TABLE_KEY));
LOG.info("HIVE: Created Table: " + createdTable.toString());
}
示例7: createPageSource
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
@Override
public Optional<? extends ConnectorPageSource> createPageSource(
Configuration configuration,
ConnectorSession session,
Path path,
long start,
long length,
Properties schema,
List<HiveColumnHandle> columns,
List<HivePartitionKey> partitionKeys,
TupleDomain<HiveColumnHandle> effectivePredicate,
DateTimeZone hiveStorageTimeZone)
{
if (!isOptimizedReaderEnabled(session)) {
return Optional.empty();
}
if (!isDeserializerClass(schema, OrcSerde.class)) {
return Optional.empty();
}
return Optional.of(createOrcPageSource(
new OrcMetadataReader(),
configuration,
path,
start,
length,
columns,
partitionKeys,
effectivePredicate,
hiveStorageTimeZone,
typeManager,
getOrcMaxMergeDistance(session),
getOrcMaxBufferSize(session),
getOrcStreamBufferSize(session)));
}
示例8: TestPreparer
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public TestPreparer(String tempFilePath)
throws Exception
{
OrcSerde serde = new OrcSerde();
schema = new Properties();
schema.setProperty("columns",
testColumns.stream()
.map(TestColumn::getName)
.collect(Collectors.joining(",")));
schema.setProperty("columns.types",
testColumns.stream()
.map(TestColumn::getType)
.collect(Collectors.joining(",")));
schema.setProperty(FILE_INPUT_FORMAT, OrcInputFormat.class.getName());
schema.setProperty(SERIALIZATION_LIB, serde.getClass().getName());
partitionKeys = testColumns.stream()
.filter(TestColumn::isPartitionKey)
.map(input -> new HivePartitionKey(input.getName(), HiveType.valueOf(input.getObjectInspector().getTypeName()), (String) input.getWriteValue()))
.collect(toList());
ImmutableList.Builder<HiveColumnHandle> columnsBuilder = ImmutableList.builder();
ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
int nextHiveColumnIndex = 0;
for (int i = 0; i < testColumns.size(); i++) {
TestColumn testColumn = testColumns.get(i);
int columnIndex = testColumn.isPartitionKey() ? -1 : nextHiveColumnIndex++;
ObjectInspector inspector = testColumn.getObjectInspector();
HiveType hiveType = HiveType.valueOf(inspector.getTypeName());
Type type = hiveType.getType(TYPE_MANAGER);
columnsBuilder.add(new HiveColumnHandle("client_id", testColumn.getName(), hiveType, type.getTypeSignature(), columnIndex, testColumn.isPartitionKey()));
typesBuilder.add(type);
}
columns = columnsBuilder.build();
types = typesBuilder.build();
fileSplit = createTestFile(tempFilePath, new OrcOutputFormat(), serde, null, testColumns, NUM_ROWS);
}
示例9: getDataFormat
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static String getDataFormat(StorageDescriptor descriptor) {
Preconditions.checkNotNull(descriptor);
String serde = descriptor.getSerdeInfo().getSerializationLib();
String inputFormat = descriptor.getInputFormat();
if (LazySimpleSerDe.class.getName().equals(serde)) {
if (TextInputFormat.class.getName().equals(inputFormat)) {
return BuiltinStorages.TEXT;
} else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
return BuiltinStorages.SEQUENCE_FILE;
} else {
throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
}
} else if (LazyBinarySerDe.class.getName().equals(serde)) {
if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
return BuiltinStorages.SEQUENCE_FILE;
} else {
throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
}
} else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) {
if (RCFileInputFormat.class.getName().equals(inputFormat)) {
return BuiltinStorages.RCFILE;
} else {
throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
}
} else if (ParquetHiveSerDe.class.getName().equals(serde)) {
return BuiltinStorages.PARQUET;
} else if (AvroSerDe.class.getName().equals(serde)) {
return BuiltinStorages.AVRO;
} else if (OrcSerde.class.getName().equals(serde)) {
return BuiltinStorages.ORC;
} else if (RegexSerDe.class.getName().equals(serde)) {
return BuiltinStorages.REGEX;
} else {
throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
}
}
示例10: getFromString
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static OrcSerde getFromString( final Configuration config , final String schemaString )throws IOException{
return getFromTypeInfo( config , TypeInfoUtils.getTypeInfoFromTypeString( schemaString ) );
}
示例11: getFromObjectInspector
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
public static OrcSerde getFromObjectInspector( final Configuration config , final ObjectInspector ois ) throws IOException{
return getFromTypeInfo( config , TypeInfoUtils.getTypeInfoFromObjectInspector( ois ) );
}
示例12: createSerializer
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
private static OrcSerde createSerializer(Configuration conf, Properties properties)
{
OrcSerde serde = new OrcSerde();
serde.initialize(conf, properties);
return serde;
}
示例13: createHiveRecordCursor
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; //导入依赖的package包/类
@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(
String clientId,
Configuration configuration,
ConnectorSession session,
Path path,
long start,
long length,
Properties schema,
List<HiveColumnHandle> columns,
List<HivePartitionKey> partitionKeys,
TupleDomain<HiveColumnHandle> effectivePredicate,
DateTimeZone hiveStorageTimeZone,
TypeManager typeManager)
{
if (!enabled) {
return Optional.empty();
}
if (!isDeserializerClass(schema, OrcSerde.class)) {
return Optional.empty();
}
RecordReader recordReader;
try {
FileSystem fileSystem = path.getFileSystem(configuration);
Reader reader = OrcFile.createReader(fileSystem, path);
boolean[] include = findIncludedColumns(reader.getTypes(), columns);
recordReader = reader.rows(start, length, include);
}
catch (Exception e) {
throw Throwables.propagate(e);
}
return Optional.<HiveRecordCursor>of(new OrcHiveRecordCursor(
recordReader,
length,
schema,
partitionKeys,
columns,
hiveStorageTimeZone,
typeManager));
}