本文整理汇总了Java中org.apache.pig.piggybank.storage.avro.PigSchema2Avro.setTupleIndex方法的典型用法代码示例。如果您正苦于以下问题:Java PigSchema2Avro.setTupleIndex方法的具体用法?Java PigSchema2Avro.setTupleIndex怎么用?Java PigSchema2Avro.setTupleIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.piggybank.storage.avro.PigSchema2Avro
的用法示例。
在下文中一共展示了PigSchema2Avro.setTupleIndex方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testRecordWithFieldSchema
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithFieldSchema() throws IOException {
PigSchema2Avro.setTupleIndex(1);
String output= outbasedir + "testRecordWithFieldSchema";
String expected = basedir + "expected_testRecordWithFieldSchema.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + Util.encodeEscape(testRecordFile) + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"data\": \"" + Util.encodeEscape(testRecordFile) + "\" ," +
" \"field0\": \"int\", " +
" \"field1\": \"def:browser_id\", " +
" \"field3\": \"def:act_content\" " +
" }');"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例2: testRecordWithFieldSchemaFromText
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithFieldSchemaFromText() throws IOException {
PigSchema2Avro.setTupleIndex(1);
String output= outbasedir + "testRecordWithFieldSchemaFromText";
String expected = basedir + "expected_testRecordWithFieldSchema.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + Util.encodeEscape(testTextFile) + "' AS (member_id:int, browser_id:chararray, tracking_time:long, act_content:bag{inner:tuple(key:chararray, value:chararray)});",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"data\": \"" + Util.encodeEscape(testRecordFile) + "\" ," +
" \"field0\": \"int\", " +
" \"field1\": \"def:browser_id\", " +
" \"field3\": \"def:act_content\" " +
" }');"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例3: testRecordWithFieldSchemaFromTextWithSchemaFile
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithFieldSchemaFromTextWithSchemaFile() throws IOException {
PigSchema2Avro.setTupleIndex(1);
String output= outbasedir + "testRecordWithFieldSchemaFromTextWithSchemaFile";
String expected = basedir + "expected_testRecordWithFieldSchema.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + Util.encodeEscape(testTextFile) + "' AS (member_id:int, browser_id:chararray, tracking_time:long, act_content:bag{inner:tuple(key:chararray, value:chararray)});",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"schema_file\": \"" + Util.encodeEscape(testRecordSchema) + "\" ," +
" \"field0\": \"int\", " +
" \"field1\": \"def:browser_id\", " +
" \"field3\": \"def:act_content\" " +
" }');"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例4: testFileWithNoExtension
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testFileWithNoExtension() throws IOException {
PigSchema2Avro.setTupleIndex(4);
String output= outbasedir + "testFileWithNoExtension";
String expected = basedir + "expected_testFileWithNoExtension.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + Util.encodeEscape(testNoExtensionFile) + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"data\": \"" + Util.encodeEscape(testNoExtensionFile) + "\" ," +
" \"field0\": \"int\", " +
" \"field1\": \"def:browser_id\", " +
" \"field3\": \"def:act_content\" " +
" }');"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例5: testRecordWithFieldSchemaFromTextWithSchemaFile2
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithFieldSchemaFromTextWithSchemaFile2() throws IOException {
PigSchema2Avro.setTupleIndex(1);
String output= outbasedir + "testRecordWithFieldSchemaFromTextWithSchemaFile2";
String expected = basedir + "expected_testRecordWithFieldSchema.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + Util.encodeEscape(testTextFile) + "' AS (member_id:int, browser_id:chararray, tracking_time:long, act_content:bag{inner:tuple(key:chararray, value:chararray)});",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'schema_file', '" + Util.encodeEscape(testRecordSchema) + "'," +
"'field0','int'," +
"'field1','def:browser_id'," +
"'field3','def:act_content'" +
");"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例6: testRecordWithFieldSchema
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithFieldSchema() throws IOException {
PigSchema2Avro.setTupleIndex(1);
String output= outbasedir + "testRecordWithFieldSchema";
String expected = basedir + "expected_testRecordWithFieldSchema.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + testRecordFile + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"data\": \"" + testRecordFile + "\" ," +
" \"field0\": \"int\", " +
" \"field1\": \"def:browser_id\", " +
" \"field3\": \"def:act_content\" " +
" }');"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例7: testRecordWithFieldSchemaFromText
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithFieldSchemaFromText() throws IOException {
PigSchema2Avro.setTupleIndex(1);
String output= outbasedir + "testRecordWithFieldSchemaFromText";
String expected = basedir + "expected_testRecordWithFieldSchema.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + testTextFile + "' AS (member_id:int, browser_id:chararray, tracking_time:long, act_content:bag{inner:tuple(key:chararray, value:chararray)});",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"data\": \"" + testRecordFile + "\" ," +
" \"field0\": \"int\", " +
" \"field1\": \"def:browser_id\", " +
" \"field3\": \"def:act_content\" " +
" }');"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例8: testRecordWithFieldSchemaFromTextWithSchemaFile
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithFieldSchemaFromTextWithSchemaFile() throws IOException {
PigSchema2Avro.setTupleIndex(1);
String output= outbasedir + "testRecordWithFieldSchemaFromTextWithSchemaFile";
String expected = basedir + "expected_testRecordWithFieldSchema.avro";
deleteDirectory(new File(output));
String [] queries = {
" avro = LOAD '" + testTextFile + "' AS (member_id:int, browser_id:chararray, tracking_time:long, act_content:bag{inner:tuple(key:chararray, value:chararray)});",
" avro1 = FILTER avro BY member_id > 1211;",
" avro2 = FOREACH avro1 GENERATE member_id, browser_id, tracking_time, act_content ;",
" STORE avro2 INTO '" + output + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"schema_file\": \"" + testRecordSchema + "\" ," +
" \"field0\": \"int\", " +
" \"field1\": \"def:browser_id\", " +
" \"field3\": \"def:act_content\" " +
" }');"
};
testAvroStorage( queries);
verifyResults(output, expected);
}
示例9: testRecordWithSplit
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithSplit() throws IOException {
PigSchema2Avro.setTupleIndex(0);
String output1= outbasedir + "testRecordSplit1";
String output2= outbasedir + "testRecordSplit2";
String expected1 = basedir + "expected_testRecordSplit1.avro";
String expected2 = basedir + "expected_testRecordSplit2.avro";
deleteDirectory(new File(output1));
deleteDirectory(new File(output2));
String [] queries = {
" avro = LOAD '" + Util.encodeEscape(testRecordFile) + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
" groups = GROUP avro BY member_id;",
" sc = FOREACH groups GENERATE group AS key, COUNT(avro) AS cnt;",
" STORE sc INTO '" + output1 + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"index\": 1, " +
" \"schema\": {\"type\":\"record\", " +
" \"name\":\"result\", " +
" \"fields\":[ {\"name\":\"member_id\",\"type\":\"int\"}, " +
"{\"name\":\"count\", \"type\":\"long\"} " +
"]" +
"}" +
" }');",
" STORE sc INTO '" + output2 +
" 'USING org.apache.pig.piggybank.storage.avro.AvroStorage ('index', '2');"
};
testAvroStorage( queries);
verifyResults(output1, expected1);
verifyResults(output2, expected2);
}
示例10: testRecordWithSplitFromText
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithSplitFromText() throws IOException {
PigSchema2Avro.setTupleIndex(0);
String output1= outbasedir + "testRecordSplitFromText1";
String output2= outbasedir + "testRecordSplitFromText2";
String expected1 = basedir + "expected_testRecordSplitFromText1.avro";
String expected2 = basedir + "expected_testRecordSplitFromText2.avro";
deleteDirectory(new File(output1));
deleteDirectory(new File(output2));
String [] queries = {
" avro = LOAD '" + Util.encodeEscape(testTextFile) + "' AS (member_id:int, browser_id:chararray, tracking_time:long, act_content:bag{inner:tuple(key:chararray, value:chararray)});",
" groups = GROUP avro BY member_id;",
" sc = FOREACH groups GENERATE group AS key, COUNT(avro) AS cnt;",
" STORE sc INTO '" + output1 + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"index\": 1, " +
" \"schema\": {\"type\":\"record\", " +
" \"name\":\"result\", " +
" \"fields\":[ {\"name\":\"member_id\",\"type\":\"int\"}, " +
"{\"name\":\"count\", \"type\":\"long\"} " +
"]" +
"}" +
" }');",
" STORE sc INTO '" + output2 +
" 'USING org.apache.pig.piggybank.storage.avro.AvroStorage ('index', '2');"
};
testAvroStorage( queries);
verifyResults(output1, expected1);
verifyResults(output2, expected2);
}
示例11: testLoadwithNullValues
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
// Schema for the generated avro file test_loadavrowithnulls.avro
// ["null",{"type":"record","name":"TUPLE_0",
// "fields":[
// {"name":"name","type":["null","string"],"doc":"autogenerated from Pig Field Schema"},
// {"name":"age","type":["null","int"],"doc":"autogenerated from Pig Field Schema"},
// {"name":"gpa","type":["null","double"],"doc":"autogenerated from Pig Field Schema"}]}]
public void testLoadwithNullValues() throws IOException {
//Input is supposed to have empty tuples
PigSchema2Avro.setTupleIndex(0);
Data data = resetData(pigServerLocal);
String output = outbasedir + "testLoadwithNulls";
deleteDirectory(new File(output));
String [] queries = {
" A = load '" + testLoadwithNullValues + "' USING " +
" org.apache.pig.piggybank.storage.avro.AvroStorage(); ",
" B = order A by name;",
" store B into '" + output +"' USING mock.Storage();"
};
testAvroStorage(queries);
List<Tuple> out = data.get(output);
assertEquals(out + " size", 4, out.size());
assertEquals(schema("name:chararray,age:int,gpa:double"), data.getSchema(output));
// sorted data ordered by name
assertEquals(tuple((String)null),out.get(0));
assertEquals(tuple((String)null),out.get(1));
assertEquals(tuple("calvin ellison", 24, 0.71), out.get(2));
assertEquals(tuple("wendy johnson", 60, 0.07), out.get(3));
}
示例12: testUserDefinedLoadSchema
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testUserDefinedLoadSchema() throws IOException {
PigSchema2Avro.setTupleIndex(2);
// Verify that user specified schema correctly maps to input schemas
// Input Avro files have the following schemas:
// name:"string", address:[customField1:"int", addressLine:"string"]
// address:[addressLine:"string", customField2:"int"], name:"string"
// User Avro schema looks like this:
// name:"string", address:[customField1:"int", customField2:"int", customField3:"int"]
// This test will confirm that AvroStorage correctly maps fields from writer to reader schema,
// dropping, adding, and reordering fields where needed.
String output= outbasedir + "testUserDefinedLoadSchema";
String expected = basedir + "expected_testUserDefinedLoadSchema.avro";
String customSchema =
"{\"type\": \"record\", \"name\": \"employee\", \"fields\": [ "
+"{ \"default\": \"***\", \"type\": \"string\", \"name\": \"name\" }, "
+"{ \"name\": \"address\", \"type\": { "
+"\"type\": \"record\", \"name\": \"addressDetails\", \"fields\": [ "
+"{ \"default\": 0, \"type\": \"int\", \"name\": \"customField1\" }, "
+"{ \"default\": 0, \"type\": \"int\", \"name\": \"customField2\" }, "
+"{ \"default\": 0, \"type\": \"int\", \"name\": \"customField3\" } "
+"] "
+"} } "
+"] } ";
deleteDirectory(new File(output));
String [] queries = {
" in = LOAD '" + testUserDefinedLoadSchemaFile
+ "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ('schema', '" + customSchema + "');",
" o = ORDER in BY name;",
" STORE o INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
};
testAvroStorage(queries);
verifyResults(output, expected);
}
示例13: testLoadwithNullValues
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
// Schema for the generated avro file test_loadavrowithnulls.avro
// ["null",{"type":"record","name":"TUPLE_0",
// "fields":[
// {"name":"name","type":["null","string"],"doc":"autogenerated from Pig Field Schema"},
// {"name":"age","type":["null","int"],"doc":"autogenerated from Pig Field Schema"},
// {"name":"gpa","type":["null","double"],"doc":"autogenerated from Pig Field Schema"}]}]
public void testLoadwithNullValues() throws IOException {
//Input is supposed to have empty tuples
PigSchema2Avro.setTupleIndex(0);
Data data = resetData(pigServerLocal);
String output = outbasedir + "testLoadwithNulls";
deleteDirectory(new File(output));
String [] queries = {
" A = load '" + testLoadwithNullValues + "' USING " +
" org.apache.pig.piggybank.storage.avro.AvroStorage(); ",
" B = order A by name;",
" store B into '" + output +"' USING mock.Storage();"
};
testAvroStorage(queries);
List<Tuple> out = data.get(output);
assertEquals(out + " size", 4, out.size());
assertEquals(schema("name:chararray,age:int,gpa:double"), data.getSchema(output));
// sorted data ordered by name
assertEquals(tuple((String)null),out.get(0));
assertEquals(tuple((String)null),out.get(1));
assertEquals(tuple("calvin ellison", 24, 0.71), out.get(2));
assertEquals(tuple("wendy johnson", 60, 0.07), out.get(3));
}
示例14: testMultipleLoadStore
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testMultipleLoadStore() throws Exception {
PigSchema2Avro.setTupleIndex(0);
Data data = resetData(pigServerLocal);
data.set("foo",
tuple(1, 2, 3),
tuple(4, 5, 6),
tuple(7, 8, 9));
data.set("bar",
tuple("a", "b", "c"),
tuple("d", "e", "f"),
tuple("g", "h", "i"));
String output = outbasedir + "testMultipleLoadStore";
deleteDirectory(new File(output));
String[] storeQuery = {
"A = LOAD 'foo' USING " + "mock.Storage() as (a1:int, a2:int, a3:int);",
"B = LOAD 'bar' USING " + "mock.Storage() as (b1:chararray, b2:chararray, b3:chararray);",
"STORE A into '"+ output +"/A' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
"STORE B into '"+ output +"/B' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();"
};
testAvroStorage(storeQuery);
String[] loadQuery = {
"C = LOAD '"+ output +"/A' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
"D = LOAD '"+ output +"/B' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
"STORE C into 'foo-actual' USING mock.Storage();",
"STORE D into 'bar-actual' USING mock.Storage();"
};
testAvroStorage(loadQuery);
assertEquals(data.get("foo"), data.get("foo-actual"));
assertEquals(data.get("bar"), data.get("bar-actual"));
assertEquals("{a1: int,a2: int,a3: int}", data.getSchema("foo-actual").toString());
assertEquals("{b1: chararray,b2: chararray,b3: chararray}", data.getSchema("bar-actual").toString());
}
示例15: testRecordWithSplit
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro; //导入方法依赖的package包/类
@Test
public void testRecordWithSplit() throws IOException {
PigSchema2Avro.setTupleIndex(0);
String output1= outbasedir + "testRecordSplit1";
String output2= outbasedir + "testRecordSplit2";
String expected1 = basedir + "expected_testRecordSplit1.avro";
String expected2 = basedir + "expected_testRecordSplit2.avro";
deleteDirectory(new File(output1));
deleteDirectory(new File(output2));
String [] queries = {
" avro = LOAD '" + testRecordFile + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
" groups = GROUP avro BY member_id;",
" sc = FOREACH groups GENERATE group AS key, COUNT(avro) AS cnt;",
" STORE sc INTO '" + output1 + "' " +
" USING org.apache.pig.piggybank.storage.avro.AvroStorage (" +
"'{\"index\": 1, " +
" \"schema\": {\"type\":\"record\", " +
" \"name\":\"result\", " +
" \"fields\":[ {\"name\":\"member_id\",\"type\":\"int\"}, " +
"{\"name\":\"count\", \"type\":\"long\"} " +
"]" +
"}" +
" }');",
" STORE sc INTO '" + output2 +
" 'USING org.apache.pig.piggybank.storage.avro.AvroStorage ('index', '2');"
};
testAvroStorage( queries);
verifyResults(output1, expected1);
verifyResults(output2, expected2);
}