本文整理汇总了Java中org.apache.hive.hcatalog.data.HCatRecord类的典型用法代码示例。如果您正苦于以下问题:Java HCatRecord类的具体用法?Java HCatRecord怎么用?Java HCatRecord使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HCatRecord类属于org.apache.hive.hcatalog.data包,在下文中一共展示了HCatRecord类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: generateHCatRecords
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
private List<HCatRecord> generateHCatRecords(int numRecords,
HCatSchema hCatTblSchema, ColumnGenerator... extraCols) throws Exception {
List<HCatRecord> records = new ArrayList<HCatRecord>();
List<HCatFieldSchema> hCatTblCols = hCatTblSchema.getFields();
int size = hCatTblCols.size();
for (int i = 0; i < numRecords; ++i) {
DefaultHCatRecord record = new DefaultHCatRecord(size);
record.set(hCatTblCols.get(0).getName(), hCatTblSchema, i);
record.set(hCatTblCols.get(1).getName(), hCatTblSchema, "textfield" + i);
int idx = 0;
for (int j = 0; j < extraCols.length; ++j) {
if (extraCols[j].getKeyType() == KeyType.STATIC_KEY) {
continue;
}
record.set(hCatTblCols.get(idx + 2).getName(), hCatTblSchema,
extraCols[j].getHCatValue(i));
++idx;
}
records.add(record);
}
return records;
}
示例2: split
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
/**
* Calculates the 'desired' number of splits based on desiredBundleSizeBytes which is passed as
* a hint to native API. Retrieves the actual splits generated by native API, which could be
* different from the 'desired' split count calculated using desiredBundleSizeBytes
*/
@Override
public List<BoundedSource<HCatRecord>> split(
long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
int desiredSplitCount = 1;
long estimatedSizeBytes = getEstimatedSizeBytes(options);
if (desiredBundleSizeBytes > 0 && estimatedSizeBytes > 0) {
desiredSplitCount = (int) Math.ceil((double) estimatedSizeBytes / desiredBundleSizeBytes);
}
ReaderContext readerContext = getReaderContext(desiredSplitCount);
//process the splits returned by native API
//this could be different from 'desiredSplitCount' calculated above
LOG.info(
"Splitting into bundles of {} bytes: "
+ "estimated size {}, desired split count {}, actual split count {}",
desiredBundleSizeBytes,
estimatedSizeBytes,
desiredSplitCount,
readerContext.numSplits());
List<BoundedSource<HCatRecord>> res = new ArrayList<>();
for (int split = 0; split < readerContext.numSplits(); split++) {
res.add(new BoundedHCatalogSource(spec.withContext(readerContext).withSplitId(split)));
}
return res;
}
示例3: testReadFromSource
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
/** Test of Read using SourceTestUtils.readFromSource(..). */
@Test
@NeedsTestData
public void testReadFromSource() throws Exception {
ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
HCatalogIO.Read spec =
HCatalogIO.read()
.withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
.withContext(context)
.withTable(TEST_TABLE);
List<String> records = new ArrayList<>();
for (int i = 0; i < context.numSplits(); i++) {
BoundedHCatalogSource source = new BoundedHCatalogSource(spec.withSplitId(i));
for (HCatRecord record : SourceTestUtils.readFromSource(source, OPTIONS)) {
records.add(record.get(0).toString());
}
}
assertThat(records, containsInAnyOrder(getExpectedRecords(TEST_RECORDS_COUNT).toArray()));
}
示例4: testSourceEqualsSplits
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
/** Test of Read using SourceTestUtils.assertSourcesEqualReferenceSource(..). */
@Test
@NeedsTestData
public void testSourceEqualsSplits() throws Exception {
final int numRows = 1500;
final int numSamples = 10;
final long bytesPerRow = 15;
ReaderContext context = getReaderContext(getConfigPropertiesAsMap(service.getHiveConf()));
HCatalogIO.Read spec =
HCatalogIO.read()
.withConfigProperties(getConfigPropertiesAsMap(service.getHiveConf()))
.withContext(context)
.withTable(TEST_TABLE);
BoundedHCatalogSource source = new BoundedHCatalogSource(spec);
List<BoundedSource<HCatRecord>> unSplitSource = source.split(-1, OPTIONS);
assertEquals(1, unSplitSource.size());
List<BoundedSource<HCatRecord>> splits =
source.split(numRows * bytesPerRow / numSamples, OPTIONS);
assertTrue(splits.size() >= 1);
SourceTestUtils.assertSourcesEqualReferenceSource(unSplitSource.get(0), splits, OPTIONS);
}
示例5: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(T key, HCatRecord value, Context context) throws IOException, InterruptedException {
HCatFieldSchema field;
Object fieldValue;
for (int m = 0; m < columnSize; m++) {
field = schema.get(m);
fieldValue = value.get(field.getName(), schema);
if (fieldValue == null)
fieldValue = "NULL";
if (counter < 5 && m < 10) {
System.out.println("Get row " + counter + " column '" + field.getName() + "' value: " + fieldValue);
}
if (fieldValue != null)
getHllc(m).add(Bytes.toBytes(fieldValue.toString()));
}
counter++;
}
示例6: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
try {
int[] flatTableIndexes = intermediateTableDesc.getRowKeyColumnIndexes();
HCatFieldSchema fieldSchema = null;
for (int i : factDictCols) {
outputKey.set((short) i);
fieldSchema = schema.get(flatTableIndexes[i]);
Object fieldValue = record.get(fieldSchema.getName(), schema);
if (fieldValue == null)
continue;
byte[] bytes = Bytes.toBytes(fieldValue.toString());
outputValue.set(bytes, 0, bytes.length);
context.write(outputKey, outputValue);
}
} catch (Exception ex) {
handleErrorRecord(record, ex);
}
}
示例7: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(KEYIN key, HCatRecord record, Context context) throws IOException, InterruptedException {
HCatFieldSchema fieldSchema = null;
for (short i = 0; i < columnSize; i++) {
outputKey.set(i);
fieldSchema = schema.get(i);
Object fieldValue = record.get(fieldSchema.getName(), schema);
if (fieldValue == null)
continue;
byte[] bytes = Bytes.toBytes(fieldValue.toString());
outputValue.set(bytes, 0, bytes.length);
context.write(outputKey, outputValue);
}
}
示例8: call
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public LabeledPoint call(Tuple2<WritableComparable, HCatRecord> tuple) throws Exception {
HCatRecord record = tuple._2();
if (record == null) {
log.info("@@@ Null record");
return defaultLabeledPoint;
}
double[] features = new double[numFeatures];
for (int i = 0; i < numFeatures; i++) {
int featurePos = featurePositions[i];
features[i] = featureValueMappers[i].call(record.get(featurePos));
}
double label = featureValueMappers[labelColumnPos].call(record.get(labelColumnPos));
return new LabeledPoint(label, Vectors.dense(features));
}
示例9: insert
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
private void insert(Map<String, String> partitionSpec, Iterable<HCatRecord> rows) {
WriteEntity entity = new WriteEntity.Builder()
.withDatabase(databaseName)
.withTable(tableName)
.withPartition(partitionSpec)
.build();
try {
HCatWriter master = DataTransferFactory.getHCatWriter(entity, config);
WriterContext context = master.prepareWrite();
HCatWriter writer = DataTransferFactory.getHCatWriter(context);
writer.write(rows.iterator());
master.commit(context);
} catch (HCatException e) {
throw new RuntimeException("An error occurred while inserting data to " + databaseName + "." + tableName, e);
}
}
示例10: testPartitionedSimple
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Test
public void testPartitionedSimple() {
HCatTable table = table().cols(columns(COLUMN_1)).partCols(columns(PARTITION_COLUMN_1));
Multimap<Map<String, String>, HCatRecord> data = new TableDataBuilder(table)
.addRow("value", "partition_value")
.build();
assertEquals(1, data.size());
Map<String, String> partitionSpec = new HashMap<>();
partitionSpec.put(PARTITION_COLUMN_1, "partition_value");
Collection<HCatRecord> rows = data.get(partitionSpec);
assertEquals(1, rows.size());
HCatRecord row = rows.iterator().next();
assertEquals(Arrays.asList((Object) "value", "partition_value"), row.getAll());
}
示例11: HCatalogRootParser
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
public HCatalogRootParser( final HCatRecord record , final HCatSchema schema ) throws IOException{
this.record = record;
fieldIndexMap = new HashMap<String,Integer>();
converterList = new ArrayList<IHCatalogPrimitiveConverter>();
schemaList = new ArrayList<HCatFieldSchema>();
for( int i = 0 ; i < schema.size() ; i++ ){
HCatFieldSchema fieldSchema = schema.get(i);
fieldIndexMap.put( fieldSchema.getName() , Integer.valueOf(i) );
converterList.add( HCatalogPrimitiveConverterFactory.get( fieldSchema ) );
schemaList.add( schema.get(i) );
}
}
示例12: map
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public void map(LongWritable key, HCatRecord hcr, Context context)
throws IOException, InterruptedException {
SqoopRecord sqr = helper.convertToSqoopRecord(hcr);
writeSqoopRecord(sqr);
context.progress();
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:8,代码来源:NetezzaExternalTableHCatExportMapper.java
示例13: convertToSqoopRecord
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
public SqoopRecord convertToSqoopRecord(HCatRecord hcr)
throws IOException {
Text key = new Text();
for (Map.Entry<String, Object> e : sqoopRecord.getFieldMap().entrySet()) {
String colName = e.getKey();
String hfn = colName.toLowerCase();
key.set(hfn);
Object hCatVal = hcr.get(hfn, hCatFullTableSchema);
if (!isOdps) {
String javaColType = colTypesJava.get(key).toString();
int sqlType = ((IntWritable) colTypesSql.get(key)).get();
HCatFieldSchema field = hCatFullTableSchema.get(hfn);
HCatFieldSchema.Type fieldType = field.getType();
String hCatTypeString = field.getTypeString();
Object sqlVal = convertToSqoop(hCatVal, fieldType, javaColType, hCatTypeString);
if (debugHCatExportMapper) {
LOG.debug("hCatVal " + hCatVal + " of type "
+ (hCatVal == null ? null : hCatVal.getClass().getName()) + ",sqlVal " + sqlVal
+ " of type " + (sqlVal == null ? null : sqlVal.getClass().getName()) + ",java type "
+ javaColType + ", sql type = " + SqoopHCatUtilities.sqlTypeString(sqlType));
}
sqoopRecord.setField(colName, sqlVal);
} else {
sqoopRecord.setField(colName, hCatVal == null ? null : hCatVal.toString());
}
}
return sqoopRecord;
}
示例14: createRecordReader
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
@Override
public RecordReader<WritableComparable, HCatRecord>
createRecordReader(InputSplit split,
TaskAttemptContext taskContext)
throws IOException, InterruptedException {
LOG.debug("Creating a SqoopHCatRecordReader");
return new SqoopHCatRecordReader(split, taskContext, this);
}
示例15: createHCatRecordReader
import org.apache.hive.hcatalog.data.HCatRecord; //导入依赖的package包/类
public RecordReader<WritableComparable, HCatRecord>
createHCatRecordReader(InputSplit split,
TaskAttemptContext taskContext)
throws IOException, InterruptedException {
LOG.debug("Creating a base HCatRecordReader");
return super.createRecordReader(split, taskContext);
}