本文整理汇总了Java中org.apache.pig.impl.util.Utils类的典型用法代码示例。如果您正苦于以下问题:Java Utils类的具体用法?Java Utils怎么用?Java Utils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Utils类属于org.apache.pig.impl.util包,在下文中一共展示了Utils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getEscapedSchemaFromString
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
public static Schema getEscapedSchemaFromString(String schemaStr) {
schemaStr = schemaStr.replaceAll("[\\r\\n]", "");
String[] fieldSchemaStrs = schemaStr.split(",");
StringBuilder escapedSchemaBuilder = new StringBuilder();
for (int i = 0; i < fieldSchemaStrs.length; i++) {
escapedSchemaBuilder.append(escapeFieldSchemaStr(fieldSchemaStrs[i]));
if (i != fieldSchemaStrs.length - 1)
escapedSchemaBuilder.append(",");
}
try {
return Utils.getSchemaFromString(escapedSchemaBuilder.toString());
} catch (ParserException pe) {
throw new IllegalArgumentException("Invalid schema format: " + pe.getMessage());
}
}
示例2: outputSchema
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Override
public Schema outputSchema(Schema input) {
if (null != this.schemaFunction) {
try {
Tuple t = TupleFactory.getInstance().newTuple(1);
// Strip enclosing '{}' from schema
t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", ""));
return Utils.getSchemaFromString((String) this.schemaFunction.exec(t));
} catch (ParserException pe) {
throw new RuntimeException(pe);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
} else {
return this.schema;
}
}
示例3: outputSchema
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Override
public Schema outputSchema(Schema input) {
StringBuilder sb = new StringBuilder();
for (int f=0; f < fields.length; f++) {
sb.append(",").append(fields[f].name).append(":").append(toPigType(fields[f].name));
}
String serializedSchema = "docs: bag {t: tuple(id:chararray"+sb.toString()+")}";
Schema schema = null;
try {
schema = Utils.getSchemaFromString(serializedSchema);
} catch (Exception e) {
if (e instanceof RuntimeException) {
throw (RuntimeException)e;
} else {
throw new IllegalStateException(e);
}
}
return schema;
}
示例4: registerScalarScript
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
private void registerScalarScript(boolean useScalar, String expectedSchemaStr) throws IOException {
PigServer pig = new PigServer(cluster.getExecType(), properties);
pig.registerQuery("A = load 'adata' AS (a: int, b: int);");
//scalar
pig.registerQuery("C = FOREACH A GENERATE *;");
String overrideScalar = useScalar ? "C = FILTER A BY b % 2 == 0; " : "";
pig.registerQuery("B = FOREACH (GROUP A BY a) { " +
overrideScalar +
"D = FILTER A BY b % 2 == 1;" +
"GENERATE group AS a, A.b AS every, C.b AS even, D.b AS odd;" +
"};");
Schema dumpedSchema = pig.dumpSchema("B");
Schema expectedSchema = Utils.getSchemaFromString(
expectedSchemaStr);
assertEquals(expectedSchema, dumpedSchema);
}
示例5: testShipOrcStorer
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Test
public void testShipOrcStorer() throws Exception {
String query = "a = load '1.txt' as (name:chararray, age:int, gpa:double);" +
"store a into 'ooo' using OrcStorage;";
PhysicalPlan pp = Util.buildPp(pigServer, query);
String hadoopVersion = "20S";
if (Utils.isHadoop23() || Utils.isHadoop2()) {
hadoopVersion = "23";
}
String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde",
"hive-shims-0." + hadoopVersion, "hive-shims-common-0", "hive-shims-common-secure",
"kryo"};
checkPlan(pp, expectedJars, 7, pigServer.getPigContext());
}
示例6: doAllPredecessors
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
protected void doAllPredecessors(Operator node,
Set<Operator> seen,
Collection<Operator> fifo) throws FrontendException {
if (!seen.contains(node)) {
// We haven't seen this one before.
Collection<Operator> preds = Utils.mergeCollection(plan.getPredecessors(node), plan.getSoftLinkPredecessors(node));
if (preds != null && preds.size() > 0) {
// Do all our predecessors before ourself
for (Operator op : preds) {
doAllPredecessors(op, seen, fifo);
}
}
// Now do ourself
seen.add(node);
fifo.add(node);
}
}
示例7: prepareToWrite
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Override
public void prepareToWrite(RecordWriter writer) throws IOException {
// Store the record writer reference so we can use it when it's time
// to write tuples
this.writer = writer;
// Get the schema string from the UDFContext object.
UDFContext udfc = UDFContext.getUDFContext();
Properties p =
udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
String strSchema = p.getProperty(SCHEMA_SIGNATURE);
if (strSchema == null) {
throw new IOException("Could not find schema in UDF context");
}
// Parse the schema from the string stored in the properties object.
schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
// Build a Json factory
jsonFactory = new JsonFactory();
}
示例8: getSchema
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Override
public ResourceSchema getSchema(String location,
Job job) throws IOException {
if (!dontLoadSchema) {
schema = (new JsonMetadata()).getSchema(location, job, isSchemaOn);
if (signature != null && schema != null) {
if(tagFile) {
schema = Utils.getSchemaWithInputSourceTag(schema, "INPUT_FILE_NAME");
} else if(tagPath) {
schema = Utils.getSchemaWithInputSourceTag(schema, "INPUT_FILE_PATH");
}
Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
new String[] {signature});
p.setProperty(signature + ".schema", schema.toString());
}
}
return schema;
}
示例9: doAllSuccessors
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
protected void doAllSuccessors(O node,
Set<O> seen,
Collection<O> fifo) throws VisitorException {
if (!seen.contains(node)) {
// We haven't seen this one before.
Collection<O> succs = Utils.mergeCollection(mPlan.getSuccessors(node), mPlan.getSoftLinkSuccessors(node));
if (succs != null && succs.size() > 0) {
// Do all our successors before ourself
for (O op : succs) {
doAllSuccessors(op, seen, fifo);
}
}
// Now do ourself
seen.add(node);
fifo.add(node);
}
}
示例10: doAllPredecessors
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
protected void doAllPredecessors(O node,
Set<O> seen,
Collection<O> fifo) throws VisitorException {
if (!seen.contains(node)) {
// We haven't seen this one before.
Collection<O> preds = Utils.mergeCollection(mPlan.getPredecessors(node), mPlan.getSoftLinkPredecessors(node));
if (preds != null && preds.size() > 0) {
// Do all our predecessors before ourself
for (O op : preds) {
doAllPredecessors(op, seen, fifo);
}
}
// Now do ourself
seen.add(node);
fifo.add(node);
}
}
示例11: testNewMergeNullSchemas
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Test
public void testNewMergeNullSchemas() throws Throwable {
LogicalSchema a = Utils.parseSchema( "a1:bytearray, b1:(b11:int, b12:float), c1:long" );
LogicalSchema b = Utils.parseSchema( "a2:bytearray, b2:(), c2:int" );
LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union);
LogicalSchema expected = Utils.parseSchema( "a1:bytearray, b1:(), c1:long" );
assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));
mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach);
expected = Utils.parseSchema( "a1:bytearray, b1:(b11:int, b12:float), c1:long" );
assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));
mergedSchema = LogicalSchema.merge(b, a, LogicalSchema.MergeMode.LoadForEach);
expected = Utils.parseSchema( "a2:bytearray, b2:(b11:int,b12:float), c2:int" );
assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));
}
示例12: testIndirectSelfJoinRealias
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Test
public void testIndirectSelfJoinRealias() throws Exception {
setUp(ExecType.LOCAL);
Data data = resetData(pigServer);
Set<Tuple> tuples = Sets.newHashSet(tuple("a"), tuple("b"), tuple("c"));
data.set("foo", Utils.getSchemaFromString("field1:chararray"), tuples);
pigServer.registerQuery("A = load 'foo' using mock.Storage();");
pigServer.registerQuery("B = foreach A generate *;");
pigServer.registerQuery("C = join A by field1, B by field1;");
assertEquals(Utils.getSchemaFromString("A::field1:chararray, B::field1:chararray"), pigServer.dumpSchema("C"));
pigServer.registerQuery("D = foreach C generate B::field1, A::field1 as field2;");
assertEquals(Utils.getSchemaFromString("B::field1:chararray, field2:chararray"), pigServer.dumpSchema("D"));
pigServer.registerQuery("E = foreach D generate field1, field2;");
assertEquals(Utils.getSchemaFromString("B::field1:chararray, field2:chararray"), pigServer.dumpSchema("E"));
pigServer.registerQuery("F = foreach E generate field2;");
pigServer.registerQuery("store F into 'foo_out' using mock.Storage();");
List<Tuple> out = data.get("foo_out");
assertEquals("Expected size was "+tuples.size()+" but was "+out.size(), tuples.size(), out.size());
for (Tuple t : out) {
assertTrue("Should have found tuple "+t+" in expected: "+tuples, tuples.remove(t));
}
assertTrue("All expected tuples should have been found, remaining: "+tuples, tuples.isEmpty());
}
示例13: testDescribeDanglingBranch
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Test
// See PIG-2970
public void testDescribeDanglingBranch() throws Throwable {
File f1 = createFile(new String[]{"NYSE\tIBM", "NASDAQ\tYHOO", "NASDAQ\tMSFT"});
pigServer.registerQuery("daily = load '" + Util.generateURI(f1.toString(), pigServer.getPigContext())
+"' as (exchange, symbol);");
pigServer.registerQuery("grpd = group daily by exchange;");
pigServer.registerQuery("unique = foreach grpd { sym = daily.symbol; uniq_sym = distinct sym; uniq_sym = distinct sym; generate group, daily;};");
pigServer.registerQuery("zzz = foreach unique generate group;");
Schema dumpedSchema = pigServer.dumpSchema("zzz") ;
Schema expectedSchema = Utils.getSchemaFromString(
"group: bytearray");
Assert.assertEquals(expectedSchema, dumpedSchema);
Iterator<Tuple> iter = pigServer.openIterator("zzz");
Assert.assertTrue(iter.next().toString().equals("(NYSE)"));
Assert.assertTrue(iter.next().toString().equals("(NASDAQ)"));
Assert.assertFalse(iter.hasNext());
}
示例14: testIndirectSelfJoinData
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Test
public void testIndirectSelfJoinData() throws Exception {
setUp(ExecType.LOCAL);
Data data = resetData(pigServer);
Set<Tuple> tuples = Sets.newHashSet(tuple("a", 1), tuple("b", 2), tuple("c", 3));
data.set("foo", Utils.getSchemaFromString("field1:chararray,field2:int"), tuples);
pigServer.registerQuery("A = load 'foo' using mock.Storage();");
pigServer.registerQuery("B = foreach A generate field1, field2*2 as field2;");
pigServer.registerQuery("C = join A by field1, B by field1;");
pigServer.registerQuery("D = foreach C generate A::field1 as field1_a, B::field1 as field1_b, A::field2 as field2_a, B::field2 as field2_b;");
pigServer.registerQuery("store D into 'foo_out' using mock.Storage();");
Set<Tuple> expected = Sets.newHashSet(tuple("a", "a", 1, 2), tuple("b", "b", 2, 4), tuple("c", "c", 3, 6));
List<Tuple> out = data.get("foo_out");
assertEquals("Expected size was "+expected.size()+" but was "+out.size(), expected.size(), out.size());
for (Tuple t : out) {
assertTrue("Should have found tuple "+t+" in expected: "+expected, expected.remove(t));
}
assertTrue("All expected tuples should have been found, remaining: "+expected, expected.isEmpty());
}
示例15: testSchemaInStoreForDistinctLimit
import org.apache.pig.impl.util.Utils; //导入依赖的package包/类
@Test
public void testSchemaInStoreForDistinctLimit() throws Exception {
//test if the POStore in the 2nd mr plan (that stores the actual output)
// has a schema
String query = "a = load 'input1' as (a : int,b :float ,c : int);" +
"b = distinct a;" +
"c = limit b 10;" +
"store c into 'output';";
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
MapReduceOper secondMrOper = mrPlan.getLeaves().get(0);
POStore store = (POStore)secondMrOper.reducePlan.getLeaves().get(0);
assertEquals(
"compare load and store schema",
store.getSchema(),
Utils.getSchemaFromString("a : int,b :float ,c : int")
);
}