本文整理汇总了Java中org.apache.pig.PigServer.dumpSchema方法的典型用法代码示例。如果您正苦于以下问题:Java PigServer.dumpSchema方法的具体用法?Java PigServer.dumpSchema怎么用?Java PigServer.dumpSchema使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.PigServer
的用法示例。
在下文中一共展示了PigServer.dumpSchema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: registerScalarScript
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void registerScalarScript(boolean useScalar, String expectedSchemaStr) throws IOException {
PigServer pig = new PigServer(cluster.getExecType(), properties);
pig.registerQuery("A = load 'adata' AS (a: int, b: int);");
//scalar
pig.registerQuery("C = FOREACH A GENERATE *;");
String overrideScalar = useScalar ? "C = FILTER A BY b % 2 == 0; " : "";
pig.registerQuery("B = FOREACH (GROUP A BY a) { " +
overrideScalar +
"D = FILTER A BY b % 2 == 1;" +
"GENERATE group AS a, A.b AS every, C.b AS even, D.b AS odd;" +
"};");
Schema dumpedSchema = pig.dumpSchema("B");
Schema expectedSchema = Utils.getSchemaFromString(
expectedSchemaStr);
assertEquals(expectedSchema, dumpedSchema);
}
示例2: jar
import org.apache.pig.PigServer; //导入方法依赖的package包/类
public void jar() {
PigServer pigServer = null;
try {
pigServer = new MumuPigConfiguration().mapreduce();
pigServer.registerJar("mumu-pig.jar");
pigServer.registerQuery("nginxlog = load '/mapreduce/nginxlog/access/input' using com.lovecws.mumu.pig.loader.NginxLogLoader();");
pigServer.dumpSchema("nginxlog");
} catch (IOException e) {
e.printStackTrace();
pigServer.shutdown();
}
}
示例3: testDescribeForeachNoSchema
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testDescribeForeachNoSchema() throws Throwable {
PigServer pig = new PigServer(cluster.getExecType(), properties);
pig.registerQuery("a = load 'a' ;") ;
pig.registerQuery("b = foreach a generate *;") ;
Schema dumpedSchema = pig.dumpSchema("b") ;
assertNull(dumpedSchema);
}
示例4: testComplexCast
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testComplexCast() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
String[] input = {
"[key#{(1,2),(1,3)},134#]",
"[key2#]",
};
Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexCast", input);
String query = "a = load '" + tmpDirName + "/testComplexCast' as (m);" +
"b = foreach a generate ([{(i:int,j:int)}])m;";
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("b");
assertEquals("Checking expected schema",sch.toString(), "{m: map[{(i: int,j: int)}]}");
Iterator<Tuple> it = pig.openIterator("b");
Assert.assertTrue(it.hasNext());
Tuple t = it.next();
Assert.assertTrue(t.size()==1);
Assert.assertTrue(t.get(0) instanceof Map);
Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
Assert.assertTrue(((Map)t.get(0)).containsKey("134"));
Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataBag);
Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("{(1,2),(1,3)}"));
Assert.assertTrue(((Map)t.get(0)).get("134")==null);
Assert.assertTrue(it.hasNext());
t = it.next();
Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
Assert.assertTrue(((Map)t.get(0)).get("key2")==null);
Assert.assertFalse(it.hasNext());
}
示例5: testUnionOnSchemaScopedColumnName
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace' part
* in the column name in one of the inputs
* @throws IOException
* @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnName() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL);
String query_prefix =
" l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); "
+ "g = group l1 by i; "
+ "f = foreach g generate flatten(l1); "
+ "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); ";
String query = query_prefix + "u = union onschema f, l2; " ;
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("u");
Schema expectedSch = Utils.getSchemaFromString("i: int, j: int");
assertEquals("Checking expected schema",sch, expectedSch);
Iterator<Tuple> it = pig.openIterator("u");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(1,2)",
"(5,3)",
"(1,2)",
"(5,3)"
});
Util.checkQueryOutputsAfterSort(it, expectedRes);
// now try reversing the order of relation
query = query_prefix + "u = union onschema l2, f; " ;
Util.registerMultiLineQuery(pig, query);
sch = pig.dumpSchema("u");
expectedSch = Utils.getSchemaFromString("i: int, j: int");
assertEquals("Checking expected schema",sch, expectedSch);
it = pig.openIterator("u");
Util.checkQueryOutputsAfterSort(it, expectedRes);
}
示例6: testEnsureProperSchema2
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test(expected = FrontendException.class)
public void testEnsureProperSchema2() throws Exception {
PigServer pig = new PigServer(ExecType.LOCAL);
pig.registerQuery("DEFINE badSchema org.apache.pig.test.TestUDF$MirrorSchema('a:int, b:int, c:int');");
pig.registerQuery("a = load 'thing';");
pig.registerQuery("b = foreach a generate badSchema();");
pig.dumpSchema("b");
}
示例7: testUDFMultiLevelOutputSchema
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testUDFMultiLevelOutputSchema() throws Exception {
PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties());
pig.registerQuery("A = LOAD 'a.txt';");
pig.registerQuery("B = FOREACH A GENERATE org.apache.pig.test.utils.MultiLevelDerivedUDF1();");
pig.registerQuery("C = FOREACH A GENERATE org.apache.pig.test.utils.MultiLevelDerivedUDF2();");
pig.registerQuery("D = FOREACH A GENERATE org.apache.pig.test.utils.MultiLevelDerivedUDF3();");
Schema s = pig.dumpSchema("B");
assertTrue(s.getField(0).type == DataType.DOUBLE);
s = pig.dumpSchema("C");
assertTrue(s.getField(0).type == DataType.DOUBLE);
s = pig.dumpSchema("D");
assertTrue(s.getField(0).type == DataType.DOUBLE);
}
示例8: testProjectStarForeach
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testProjectStarForeach() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL);
//specifying the new aliases only for initial set of fields
String query =
" l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int, d : int, e : int);"
+ "f = foreach l1 generate * as (aa, bb, cc);"
;
Util.registerMultiLineQuery(pig, query);
Schema expectedSch = Utils.getSchemaFromString("aa : int, bb : int, cc : int, d : int, e : int");
Schema sch = pig.dumpSchema("f");
assertEquals("Checking expected schema", expectedSch, sch);
//specifying aliases for all fields
query =
" l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int, d : int, e : int);"
+ "f = foreach l1 generate * as (aa, bb, cc, dd, ee);"
;
Util.registerMultiLineQuery(pig, query);
expectedSch = Utils.getSchemaFromString("aa : int, bb : int, cc : int, dd : int, ee : int");
sch = pig.dumpSchema("f");
assertEquals("Checking expected schema", expectedSch, sch);
Iterator<Tuple> it = pig.openIterator("f");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(10,20,30,40,50)",
"(11,21,31,41,51)",
});
Util.checkQueryOutputsAfterSort(it, expectedRes);
}
示例9: testUnionOnSchemaScopedColumnNameBothInp2
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace' part
* in the column name in both the inputs
* @throws IOException
* @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnNameBothInp2() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL);
String query =
" l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); "
+ " l2 = load '" + INP_FILE_2NUMS + "' as (i : int, x : chararray); "
+ " cg1 = cogroup l1 by i, l2 by i; "
+ " f1 = foreach cg1 generate group as gkey, flatten(l1), flatten(l2); "
+ " cg2 = cogroup l2 by i, l1 by i; "
+ " f2 = foreach cg1 generate group as gkey, flatten(l2), flatten(l1); "
+ "u = union onschema f1, f2; " ;
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("u");
Schema expectedSch =
Utils.getSchemaFromString("gkey: int, l1::i: int, l1::j: int, l2::i: int, l2::x: chararray");
assertEquals("Checking expected schema",sch, expectedSch);
Iterator<Tuple> it = pig.openIterator("u");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(1,1,2,1,'2')",
"(5,5,3,5,'3')",
"(1,1,2,1,'2')",
"(5,5,3,5,'3')",
});
Util.checkQueryOutputsAfterSort(it, expectedRes);
}
示例10: describe
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* Describe the schema of an alias in this pipeline.
* Results will be printed to stdout.
* @param alias to be described
* @throws IOException if describe fails.
*/
public void describe(String alias) throws IOException {
if (queries.isEmpty()) {
LOG.info("No bound query to describe");
return;
}
PigServer pigServer = new PigServer(scriptContext.getPigContext(), false);
registerQuery(pigServer, queries.get(0));
pigServer.dumpSchema(alias);
}
示例11: testUnionOnSchemaFilter
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* Test UNION ONSCHEMA with operations after the union
* @throws IOException
* @throws ParserException
*/
@Test
public void testUnionOnSchemaFilter() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL);
String query =
" l1 = load '" + INP_FILE_2NUMS + "' as (i : int, x : int);"
+ "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int);"
+ "u = union onschema l1, l2;"
+ "fil = filter u by i == 5 and (x is null or x != 1);"
;
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("fil");
Schema expectedSch = Utils.getSchemaFromString("i: int, x: int, j: int");
assertEquals("Checking expected schema",sch, expectedSch);
Iterator<Tuple> it = pig.openIterator("fil");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(5,null,3)",
"(5,3,null)"
});
Util.checkQueryOutputsAfterSort(it, expectedRes);
}
示例12: testDescribeForeach
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testDescribeForeach() throws Throwable {
PigServer pig = new PigServer(cluster.getExecType(), properties);
pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
pig.registerQuery("b = foreach a generate field1 + 10;") ;
Schema dumpedSchema = pig.dumpSchema("b") ;
Schema expectedSchema = new Schema(new Schema.FieldSchema(null, DataType.INTEGER));
assertEquals(expectedSchema, dumpedSchema);
}
示例13: testDescribeCross
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testDescribeCross() throws Throwable {
PigServer pig = new PigServer(cluster.getExecType(), properties);
pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ;
pig.registerQuery("c = cross a, b;") ;
Schema dumpedSchema = pig.dumpSchema("c") ;
Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray");
assertEquals(expectedSchema, dumpedSchema);
}
示例14: testUnTypedMap
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testUnTypedMap() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
String[] input = {
"[key#1,key2#2]",
};
Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testUnTypedMap", input);
String query = "a = load '" + tmpDirName + "/testUnTypedMap' as (m:[]);";
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("a");
assertEquals("Checking expected schema",sch.toString(), "{m: map[]}");
Iterator<Tuple> it = pig.openIterator("a");
Assert.assertTrue(it.hasNext());
Tuple t = it.next();
Assert.assertTrue(t.size()==1);
Assert.assertTrue(t.get(0) instanceof Map);
Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataByteArray);
Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof DataByteArray);
Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));
Assert.assertFalse(it.hasNext());
}
示例15: testUnionOnSchemaSuccOps
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* Test UNION ONSCHEMA with operations after the union
* @throws IOException
* @throws ParserException
*/
@Test
public void testUnionOnSchemaSuccOps() throws IOException, ParserException {
PigServer pig = new PigServer(ExecType.LOCAL);
String query =
" l1 = load '" + INP_FILE_2NUMS + "' as (i : int);"
+ "l2 = load '" + INP_FILE_2NUMS + "' as (x : int, y : int);"
+ "u = union onschema l1, l2;"
+ "o = order u by i desc;"
+ "lim = limit o 2;"
+ "fil = filter lim by i == 5 and y is null;"
;
Util.registerMultiLineQuery(pig, query);
Schema sch = pig.dumpSchema("fil");
Schema expectedSch = Utils.getSchemaFromString("i: int, x: int, y: int");
assertEquals("Checking expected schema",sch, expectedSch);
Iterator<Tuple> it = pig.openIterator("fil");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(5,null,null)",
});
Util.checkQueryOutputsAfterSort(it, expectedRes);
}