本文整理汇总了Java中org.apache.pig.PigServer.setBatchOn方法的典型用法代码示例。如果您正苦于以下问题:Java PigServer.setBatchOn方法的具体用法?Java PigServer.setBatchOn怎么用?Java PigServer.setBatchOn使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.PigServer
的用法示例。
在下文中一共展示了PigServer.setBatchOn方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private PigServer setup(String script, Configuration conf) throws Exception {
if (conf == null) {
conf = new HdfsConfiguration();
}
conf.setIfUnset(VespaConfiguration.DRYRUN, "true");
conf.setIfUnset(VespaConfiguration.ENDPOINT, "dummy-endpoint");
// Parameter substitutions - can also be set by configuration
Map<String, String> parameters = new HashMap<>();
parameters.put("ENDPOINT", "endpoint-does-not-matter-in-dryrun,another-endpoint-that-does-not-matter");
PigServer ps = new PigServer(ExecType.LOCAL, conf);
ps.setBatchOn();
ps.registerScript(script, parameters);
return ps;
}
示例2: testNumerOfColumnsWhenDatePartitionedFiles
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testNumerOfColumnsWhenDatePartitionedFiles() throws IOException {
int count = 0;
String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
+ ", '" + startingDate + ":" + endingDate + "')";
System.out.println(funcSpecString);
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
new FuncSpec(funcSpecString));
server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
+ funcSpecString + ";");
Iterator<Tuple> result = server.openIterator("a");
Tuple t = null;
while ((t = result.next()) != null) {
Assert.assertEquals(4, t.size());
count++;
}
Assert.assertEquals(datePartitionedRowCount, count);
}
示例3: runQuery
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
throws Exception, ExecException, IOException, FrontendException {
// create a data file
String filename = TestHelper.createTempFile(data, "");
PigServer pig = new PigServer(LOCAL);
filename = filename.replace("\\", "\\\\");
patternString = patternString.replace("\\", "\\\\");
String query = "A = LOAD '" + Util.encodeEscape(filename)
+ "' USING PigStorage(',') as (a,b,c);";
String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
+ "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
+ Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";
// Run Pig
pig.setBatchOn();
pig.registerQuery(query);
pig.registerQuery(query2);
pig.executeBatch();
}
示例4: test1DayDatePartitionedFiles
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void test1DayDatePartitionedFiles() throws IOException {
int count = 0;
String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
+ ", '" + startingDate + ":" + startingDate + "')";
System.out.println(funcSpecString);
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
new FuncSpec(funcSpecString));
server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
+ funcSpecString + ";");
Iterator<Tuple> result = server.openIterator("a");
while ((result.next()) != null) {
count++;
}
Assert.assertEquals(50, count);
}
示例5: runQuery
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
throws Exception, ExecException, IOException, FrontendException {
// create a data file
String filename = TestHelper.createTempFile(data, "");
PigServer pig = new PigServer(LOCAL);
filename = filename.replace("\\", "\\\\");
patternString = patternString.replace("\\", "\\\\");
String query = "A = LOAD '" + Util.encodeEscape(filename)
+ "' USING PigStorage(',') as (a,b,c);";
String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
+ "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
+ Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";
// Run Pig
pig.setBatchOn();
pig.registerQuery(query);
pig.registerQuery(query2);
pig.executeBatch();
}
示例6: testArrayWithSnappyCompression
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testArrayWithSnappyCompression() throws IOException {
String output= outbasedir + "testArrayWithSnappyCompression";
String expected = basedir + "expected_testArrayDefault.avro";
deleteDirectory(new File(output));
Properties properties = new Properties();
properties.setProperty("mapred.output.compress", "true");
properties.setProperty("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");
properties.setProperty("avro.output.codec", "snappy");
PigServer pigServer = new PigServer(ExecType.LOCAL, properties);
pigServer.setBatchOn();
String [] queries = {
" in = LOAD '" + Util.encodeEscape(testArrayFile) + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
" STORE in INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
};
for (String query: queries){
pigServer.registerQuery(query);
}
pigServer.executeBatch();
verifyResults(output, expected, "snappy");
}
示例7: testReadingSingleFileNoProjections
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testReadingSingleFileNoProjections() throws IOException {
String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
String singlePartitionedFile = simpleDataFile.getAbsolutePath();
PigServer server = new PigServer(ExecType.LOCAL);
server.setBatchOn();
server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
new FuncSpec(funcSpecString));
server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
+ ";");
Iterator<Tuple> result = server.openIterator("a");
int count = 0;
Tuple t = null;
while ((t = result.next()) != null) {
assertEquals(3, t.size());
assertEquals(DataType.CHARARRAY, t.getType(0));
count++;
}
Assert.assertEquals(simpleRowCount, count);
}
示例8: testFilterGroupCountStore
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testFilterGroupCountStore() throws Exception {
File out = File.createTempFile("testFilterGroupCountStoreOutput", "");
out.deleteOnExit();
out.delete();
PigServer pigServer = new PigServer(pigContext);
pigServer.setBatchOn();
pigServer.registerQuery("A = load " + A.toString() + " as (x, y);");
pigServer.registerQuery("B = filter A by x < 5;");
pigServer.registerQuery("C = group B by x;");
pigServer.registerQuery("D = foreach C generate group as x, COUNT(B) as the_count;");
pigServer.registerQuery("store D into '" + Util.encodeEscape(out.getAbsolutePath()) + "';");
Map<Operator, DataBag> derivedData = pigServer.getExamples(null);
assertNotNull(derivedData);
}
示例9: testStreamingStderrLogsShouldNotBePersistedByDefault
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testStreamingStderrLogsShouldNotBePersistedByDefault() throws Exception {
Util.createInputFile(cluster, "mydummyinput.txt", new String[] { "dummy"});
PigServer pig = new PigServer(ExecType.MAPREDUCE,cluster.getProperties());
pig.setBatchOn();
pig.registerQuery("define mycmd `echo dummy` ;");
pig.registerQuery("A = load 'mydummyinput.txt' as (f1:chararray);");
pig.registerQuery("B = stream A through mycmd;");
pig.registerQuery("store B into 'output_dir_001' ;");
pig.executeBatch();
Assert.assertTrue(Util.exists(pig.getPigContext(), "output_dir_001"));
Assert.assertFalse(Util.exists(pig.getPigContext(), "output_dir_001/_logs/mycmd"));
}
示例10: setup
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private PigServer setup(String script, String endpoint) throws Exception {
Configuration conf = new HdfsConfiguration();
Map<String, String> parameters = new HashMap<>();
parameters.put("ENDPOINT", endpoint);
PigServer ps = new PigServer(ExecType.LOCAL, conf);
ps.setBatchOn();
ps.registerScript(script, parameters);
return ps;
}
示例11: testScalarAliasesSplitClause
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testScalarAliasesSplitClause() throws Exception{
Util.resetStateForExecModeSwitch();
pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
String[] input = {
"1\t5",
"2\t10",
"3\t20"
};
// Test the use of scalars in expressions
String inputPath = "table_testScalarAliasesSplitClause";
String output = "table_testScalarAliasesSplitClauseDir";
Util.createInputFile(cluster, inputPath, input);
// Test in script mode
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD '"+inputPath+"' as (a0: long, a1: double);");
pigServer.registerQuery("B = group A all;");
pigServer.registerQuery("C = foreach B generate COUNT(A) as count;");
pigServer.registerQuery("split A into Y if (2 * C.count) < a1, X if a1 == 5;");
pigServer.registerQuery("Store Y into '"+output+"';");
pigServer.executeBatch();
// Check output
pigServer.registerQuery("Z = LOAD '"+output+"' as (a0: int, a1: double);");
Iterator<Tuple> iter = pigServer.openIterator("Z");
// Y gets only last 2 elements
Tuple t = iter.next();
assertTrue(t.toString().equals("(2,10.0)"));
t = iter.next();
assertTrue(t.toString().equals("(3,20.0)"));
assertFalse(iter.hasNext());
Util.deleteFile(cluster, output);
}
示例12: registerQuery
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void registerQuery(PigServer pigServer, String pl) throws IOException {
GruntParser grunt = new GruntParser(new StringReader(pl));
grunt.setInteractive(false);
grunt.setParams(pigServer);
pigServer.setBatchOn();
try {
grunt.parseStopOnError(true);
} catch (ParseException e) {
throw new IOException("Failed to parse query: " + pl, e);
}
}
示例13: testMultiStorage
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* The actual method that run the test in local or cluster mode.
*
* @param pigServer
* @param mode
* @param queries
* @throws IOException
*/
private void testMultiStorage( Mode mode, String outPath,
String... queries) throws IOException {
PigServer pigServer = (Mode.local == mode) ? this.pigServerLocal : this.pigServer;
pigServer.setBatchOn();
for (String query : queries) {
pigServer.registerQuery(query);
}
pigServer.executeBatch();
verifyResults(mode, outPath);
}
示例14: testPredicatePushdown
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testPredicatePushdown() throws Exception {
Configuration conf = new Configuration();
conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);
PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
pigServer.setValidateEachStatement(true);
String out = "target/out";
String out2 = "target/out2";
int rows = 10;
Data data = Storage.resetData(pigServer);
List<Tuple> list = new ArrayList<Tuple>();
for (int i = 0; i < rows; i++) {
list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
}
data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
pigServer.deleteFile(out);
pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
pigServer.executeBatch();
pigServer.deleteFile(out2);
pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
pigServer.registerQuery("STORE C into '" + out2 +"' using mock.Storage();");
List<ExecJob> jobs = pigServer.executeBatch();
long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();
assertEquals(2, recordsRead);
}
示例15: testUnionOnSchemaAdditionalColumnsWithImplicitSplit
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException {
PigServer pig = new PigServer(ExecType.LOCAL);
Data data = Storage.resetData(pig);
// Use batch to force multiple outputs from relation l3. This causes
// ImplicitSplitInsertVisitor to call SchemaResetter.
pig.setBatchOn();
String query =
" l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);"
+ "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);"
+ "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);"
+ "u = union onschema l1, l2, l3;"
+ "store u into 'out1' using mock.Storage;"
+ "store l3 into 'out2' using mock.Storage;"
;
Util.registerMultiLineQuery(pig, query);
pig.executeBatch();
List<Tuple> list1 = data.get("out1");
List<Tuple> list2 = data.get("out2");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(1,2,null,null)",
"(5,3,null,null)",
"(1,2,3,null)",
"(4,5,6,null)",
});
Util.checkQueryOutputsAfterSort(list1, expectedRes);
assertEquals(0, list2.size());
}