本文整理汇总了Java中org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema类的典型用法代码示例。如果您正苦于以下问题:Java FieldSchema类的具体用法?Java FieldSchema怎么用?Java FieldSchema使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FieldSchema类属于org.apache.pig.impl.logicalLayer.schema.Schema包,在下文中一共展示了FieldSchema类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: addField
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) {
for (FieldSchema field : schema.getFields()) {
String node;
if (field.alias != null) {
// if no field
node = fa.toES(field.alias);
node = (currentNode != null ? currentNode + "." + node : node);
}
else {
node = currentNode;
}
// && field.type != DataType.TUPLE
if (field.schema != null) {
addField(field.schema, fields, fa, node);
}
else {
if (!StringUtils.hasText(node)) {
LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema);
}
if (node != null) {
fields.add(fa.toES(node));
}
}
}
}
示例2: buildElNinoInputSchema
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
private Schema buildElNinoInputSchema() throws FrontendException {
// Build Field Schema
List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
fieldSchemas.add(new Schema.FieldSchema("buoy_day_ID", DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("buoy" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("day" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("latitude" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("longitude" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("zon_winds" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("mer_winds" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("humidity" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("airtemp" , DataType.DOUBLE ));
fieldSchemas.add(new Schema.FieldSchema("s_s_temp" , DataType.DOUBLE ));
return new Schema(fieldSchemas);
}
示例3: buildAuditInputSchema
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
private Schema buildAuditInputSchema() throws FrontendException {
// Build Field Schema
List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
fieldSchemas.add(new Schema.FieldSchema("id" , DataType.LONG));
fieldSchemas.add(new Schema.FieldSchema("age" , DataType.INTEGER));
fieldSchemas.add(new Schema.FieldSchema("employment" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("education" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("marital" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("occupation" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("income" , DataType.DOUBLE));
fieldSchemas.add(new Schema.FieldSchema("gender" , DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("deductions" , DataType.DOUBLE));
fieldSchemas.add(new Schema.FieldSchema("hours" , DataType.INTEGER));
fieldSchemas.add(new Schema.FieldSchema("ignore_accounts", DataType.CHARARRAY));
fieldSchemas.add(new Schema.FieldSchema("risk_adjustment", DataType.INTEGER));
fieldSchemas.add(new Schema.FieldSchema("target_adjusted", DataType.INTEGER));
return new Schema(fieldSchemas);
}
示例4: testExecSingleIdField
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Test
public void testExecSingleIdField() throws Exception {
// given
TupleFactory tupleFactory = TupleFactory.getInstance();
Schema schema = new Schema();
schema.add(new FieldSchema(null, DataType.CHARARRAY));
schema.add(new FieldSchema(null, DataType.CHARARRAY));
schema.add(new FieldSchema("id", DataType.CHARARRAY));
idReplacer.setInputSchema(schema);
String idFieldName = "id";
String newId = "updatedId";
String oldId = "oldId";
// execute
Tuple result = idReplacer.exec(tupleFactory.newTuple(Lists.newArrayList(
idFieldName, newId, oldId)));
assertNotNull(result);
assertEquals(1, result.getAll().size());
assertEquals(newId, result.get(0));
}
示例5: testOutputSchema
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Test
public void testOutputSchema() throws Exception {
// given
Schema schema = new Schema();
schema.add(new FieldSchema(null, DataType.CHARARRAY));
schema.add(new FieldSchema(null, DataType.CHARARRAY));
FieldSchema idFieldSchema = new FieldSchema("id", DataType.CHARARRAY);
schema.add(idFieldSchema);
FieldSchema dedupFieldSchema = new FieldSchema("dedup", DataType.BOOLEAN);
schema.add(dedupFieldSchema);
// execute
Schema resultSchema = idReplacer.outputSchema(schema);
// assert
assertNotNull(resultSchema);
assertEquals(1, resultSchema.getFields().size());
assertEquals(idFieldSchema, resultSchema.getField(0).schema.getField(0));
assertEquals(dedupFieldSchema, resultSchema.getField(0).schema.getField(1));
}
示例6: testMergeDifferentSize2
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Test(expected = SchemaMergeException.class)
public void testMergeDifferentSize2() throws Throwable {
List<FieldSchema> list1 = new ArrayList<FieldSchema>();
list1.add(new FieldSchema("1a", DataType.BYTEARRAY));
list1.add(new FieldSchema("1b", DataType.BYTEARRAY));
list1.add(new FieldSchema("1c", DataType.LONG));
List<FieldSchema> list2 = new ArrayList<FieldSchema>();
list2.add(new FieldSchema("2a", DataType.BYTEARRAY));
list2.add(new FieldSchema("2b", DataType.BYTEARRAY));
list2.add(new FieldSchema("2c", DataType.INTEGER));
list2.add(new FieldSchema("2d", DataType.MAP));
Schema schema1 = new Schema(list1);
Schema schema2 = new Schema(list2);
// Merge
Schema mergedSchema = Schema.mergeSchema(schema1,
schema2,
true,
false,
false);
}
示例7: outputSchema
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
public Schema outputSchema(Schema input) {
try {
ArrayList<FieldSchema> tupleFields = new ArrayList<FieldSchema>(8);
tupleFields.add(new Schema.FieldSchema("user", DataType.CHARARRAY));
tupleFields.add(new Schema.FieldSchema("item", DataType.CHARARRAY));
tupleFields.add(new Schema.FieldSchema("weight", DataType.FLOAT));
tupleFields.add(new Schema.FieldSchema("reason", DataType.CHARARRAY));
tupleFields.add(new Schema.FieldSchema("user_link", DataType.FLOAT));
tupleFields.add(new Schema.FieldSchema("item_link", DataType.FLOAT));
tupleFields.add(new Schema.FieldSchema("diversity_adj_weight", DataType.FLOAT));
tupleFields.add(new Schema.FieldSchema("rank", DataType.INTEGER));
return new Schema(
new Schema.FieldSchema("ui_recs",
new Schema(
new Schema.FieldSchema(null,
new Schema(tupleFields),
DataType.TUPLE)),
DataType.BAG)
);
} catch (FrontendException e) {
throw new RuntimeException(e);
}
}
示例8: readColumnarStruct
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException {
Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]");
List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema);
List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema);
List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>(
cols.size());
for (int i = 0; i < cols.size(); i++) {
fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
.findPigDataType(types.get(i))));
}
Properties props = new Properties();
props.setProperty(Constants.LIST_COLUMNS,
HiveRCSchemaUtil.listToString(cols));
props.setProperty(Constants.LIST_COLUMN_TYPES,
HiveRCSchemaUtil.listToString(types));
Configuration hiveConf = new HiveConf(conf, SessionState.class);
ColumnarSerDe serde = new ColumnarSerDe();
serde.initialize(hiveConf, props);
return (ColumnarStruct) serde.deserialize(buff);
}
示例9: outputSchema
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Override
public Schema outputSchema(Schema input) {
Schema bagTupleSchema = new Schema();
Set<String> aliasSet = new HashSet<String>();
for (FieldSchema schema : input.getFields()) { //Each field should be a bag
if (schema.schema == null) throw new RuntimeException("Inner bag schemas are null");
for (FieldSchema innerBagTuple : schema.schema.getFields()) {
for (FieldSchema tupleField : innerBagTuple.schema.getFields()) {
if (!aliasSet.add(tupleField.alias)) {
throw new RuntimeException("Duplicate field alias specified");
}
bagTupleSchema.add(tupleField);
}
}
}
try {
return new Schema(new FieldSchema("zipped",bagTupleSchema, DataType.BAG));
} catch (FrontendException e) {
throw new RuntimeException(e);
}
}
示例10: outputSchema
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Override
public Schema outputSchema(Schema input) {
Schema.FieldSchema charFs = new FieldSchema(null, DataType.CHARARRAY);
Schema.FieldSchema intFs = new FieldSchema(null, DataType.INTEGER);
Schema.FieldSchema floatFs = new FieldSchema(null, DataType.FLOAT);
Schema bagSchema = new Schema();
bagSchema.add(charFs);
bagSchema.add(intFs);
bagSchema.add(floatFs);
Schema.FieldSchema bagFs;
try {
bagFs = new Schema.FieldSchema(null, bagSchema, DataType.BAG);
} catch (FrontendException fee) {
return null;
}
return new Schema(bagFs);
}
示例11: getPredicateFields
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Override
public List<String> getPredicateFields(String s, Job job) throws IOException {
if(!job.getConfiguration().getBoolean(ENABLE_PREDICATE_FILTER_PUSHDOWN, DEFAULT_PREDICATE_PUSHDOWN_ENABLED)) {
return null;
}
List<String> fields = new ArrayList<String>();
for(FieldSchema field : schema.getFields()) {
switch(field.type) {
case DataType.BOOLEAN:
case DataType.INTEGER:
case DataType.LONG:
case DataType.FLOAT:
case DataType.DOUBLE:
case DataType.CHARARRAY:
fields.add(field.alias);
break;
default:
// Skip BYTEARRAY, TUPLE, MAP, BAG, DATETIME, BIGINTEGER, BIGDECIMAL
break;
}
}
return fields;
}
示例12: convertFields
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
private Schema convertFields(List<Type> parquetFields) {
List<FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
for (Type parquetType : parquetFields) {
try{
FieldSchema innerfieldSchema = getFieldSchema(parquetType);
if (parquetType.isRepetition(Repetition.REPEATED)) {
Schema bagSchema = new Schema(Arrays.asList(innerfieldSchema));
fields.add(new FieldSchema(null, bagSchema, DataType.BAG));
} else {
fields.add(innerfieldSchema);
}
}
catch (FrontendException fe) {
throw new SchemaConversionException("can't convert "+ parquetType, fe);
}
}
return new Schema(fields);
}
示例13: convertMap
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
/**
*
* @param alias
* @param fieldSchema
* @return an optional group containing one repeated group field (key, value)
* @throws FrontendException
*/
private GroupType convertMap(String alias, FieldSchema fieldSchema) {
Schema innerSchema = fieldSchema.schema;
if (innerSchema == null || innerSchema.size() != 1) {
throw new SchemaConversionException("Invalid map Schema, schema should contain exactly one field: " + fieldSchema);
}
FieldSchema innerField = null;
try {
innerField = innerSchema.getField(0);
} catch (FrontendException fe) {
throw new SchemaConversionException("Invalid map schema, cannot infer innerschema: ", fe);
}
Type convertedValue = convertWithName(innerField, "value");
return ConversionPatterns.stringKeyMapType(Repetition.OPTIONAL, alias, name(innerField.alias, "map"),
convertedValue);
}
示例14: getBagTest
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Test
public void getBagTest() throws Exception
{
ReportBuilder udf = new ReportBuilder();
udf.setUDFContextSignature("test");
List<Schema.FieldSchema> fieldSchemaList = new ArrayList<Schema.FieldSchema>();
fieldSchemaList.add(new Schema.FieldSchema("msisdn", DataType.LONG));
fieldSchemaList.add(new Schema.FieldSchema("ts", DataType.INTEGER));
fieldSchemaList.add(new Schema.FieldSchema("center_lon", DataType.DOUBLE));
fieldSchemaList.add(new Schema.FieldSchema("center_lat", DataType.DOUBLE));
Schema schemaTuple = new Schema(fieldSchemaList);
Schema schemaBag = new Schema(new Schema.FieldSchema(ReportBuilder.ORDERED_ROUTES, schemaTuple, DataType.BAG));
udf.outputSchema(schemaBag);
Tuple inputTuple = TupleFactory.getInstance().newTuple();
DataBag inputBag = BagFactory.getInstance().newDefaultBag();
inputBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(71230000000L, 1382351612, 10.697, 20.713)));
inputTuple.append(inputBag);
DataBag outputBag = udf.exec(inputTuple);
Assert.assertEquals(inputBag, outputBag);
}
示例15: simpleBagCompare
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; //导入依赖的package包/类
@Test
public void simpleBagCompare() throws Exception {
TupleDiff tupleDiff = new TupleDiff();
Tuple tuple = tf.newTuple(Arrays.asList(
tf.newTuple(bf.newDefaultBag(Arrays.asList(TUPLE1, TUPLE3))),
tf.newTuple(bf.newDefaultBag(Arrays.asList(TUPLE2, TUPLE3)))));
Assert.assertEquals(tupleDiff.exec(tuple), "changed 0", "Should identify changed bag by position (0)");
Schema tupleSchema = new Schema(Arrays.asList(new FieldSchema("f1", DataType.BAG)));
tupleDiff.setInputSchema(new Schema(Arrays.asList(new FieldSchema("t1",tupleSchema), new FieldSchema("t2",tupleSchema))));
Assert.assertEquals(tupleDiff.exec(tuple), "changed f1", "Should identify changed bag f1 by name");
tuple = tf.newTuple(Arrays.asList(
tf.newTuple(bf.newDefaultBag(Arrays.asList(TUPLE1, TUPLE3))),
tf.newTuple(bf.newDefaultBag(Arrays.asList(TUPLE1COPY, TUPLE3)))));
Assert.assertEquals(tupleDiff.exec(tuple), null, "Equal bags shouldn't generate a result");
}