本文整理汇总了Java中org.apache.pig.PigServer.executeBatch方法的典型用法代码示例。如果您正苦于以下问题:Java PigServer.executeBatch方法的具体用法?Java PigServer.executeBatch怎么用?Java PigServer.executeBatch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.PigServer
的用法示例。
在下文中一共展示了PigServer.executeBatch方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runQuery
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
throws Exception, ExecException, IOException, FrontendException {
// create a data file
String filename = TestHelper.createTempFile(data, "");
PigServer pig = new PigServer(LOCAL);
filename = filename.replace("\\", "\\\\");
patternString = patternString.replace("\\", "\\\\");
String query = "A = LOAD '" + Util.encodeEscape(filename)
+ "' USING PigStorage(',') as (a,b,c);";
String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
+ "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
+ Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";
// Run Pig
pig.setBatchOn();
pig.registerQuery(query);
pig.registerQuery(query2);
pig.executeBatch();
}
示例2: exec
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private PigStats exec(String query) throws IOException {
LOG.info("Query to run:\n" + query);
List<PigProgressNotificationListener> listeners = ScriptState.get().getAllListeners();
PigContext pc = scriptContext.getPigContext();
ScriptState scriptState = pc.getExecutionEngine().instantiateScriptState();
ScriptState.start(scriptState);
ScriptState.get().setScript(query);
for (PigProgressNotificationListener listener : listeners) {
ScriptState.get().registerListener(listener);
}
PigServer pigServer = new PigServer(scriptContext.getPigContext(), false);
pigServer.setBatchOn();
GruntParser grunt = new GruntParser(new StringReader(query), pigServer);
grunt.setInteractive(false);
try {
grunt.parseStopOnError(true);
} catch (ParseException e) {
throw new IOException("Failed to parse script " + e.getMessage(), e);
}
pigServer.executeBatch();
return PigStats.get();
}
示例3: testStreamingStderrLogsShouldNotBePersistedByDefault
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testStreamingStderrLogsShouldNotBePersistedByDefault() throws Exception {
Util.createInputFile(cluster, "mydummyinput.txt", new String[] { "dummy"});
PigServer pig = new PigServer(ExecType.MAPREDUCE,cluster.getProperties());
pig.setBatchOn();
pig.registerQuery("define mycmd `echo dummy` ;");
pig.registerQuery("A = load 'mydummyinput.txt' as (f1:chararray);");
pig.registerQuery("B = stream A through mycmd;");
pig.registerQuery("store B into 'output_dir_001' ;");
pig.executeBatch();
Assert.assertTrue(Util.exists(pig.getPigContext(), "output_dir_001"));
Assert.assertFalse(Util.exists(pig.getPigContext(), "output_dir_001/_logs/mycmd"));
}
示例4: testPositive
import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
* Verify that ASSERT operator works
* @throws Exception
*/
@Test
public void testPositive() throws Exception {
PigServer pigServer = new PigServer(ExecType.LOCAL);
Data data = resetData(pigServer);
data.set("foo",
tuple(1),
tuple(2),
tuple(3)
);
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
pigServer.registerQuery("ASSERT A BY i > 0;");
pigServer.registerQuery("STORE A INTO 'bar' USING mock.Storage();");
pigServer.executeBatch();
List<Tuple> out = data.get("bar");
assertEquals(3, out.size());
assertEquals(tuple(1), out.get(0));
assertEquals(tuple(2), out.get(1));
assertEquals(tuple(3), out.get(2));
}
示例5: runQuery
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
throws Exception, ExecException, IOException, FrontendException {
// create a data file
String filename = TestHelper.createTempFile(data, "");
PigServer pig = new PigServer(LOCAL);
filename = filename.replace("\\", "\\\\");
patternString = patternString.replace("\\", "\\\\");
String query = "A = LOAD '" + Util.encodeEscape(filename)
+ "' USING PigStorage(',') as (a,b,c);";
String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
+ "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
+ Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";
// Run Pig
pig.setBatchOn();
pig.registerQuery(query);
pig.registerQuery(query2);
pig.executeBatch();
}
示例6: testArrayWithSnappyCompression
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testArrayWithSnappyCompression() throws IOException {
String output= outbasedir + "testArrayWithSnappyCompression";
String expected = basedir + "expected_testArrayDefault.avro";
deleteDirectory(new File(output));
Properties properties = new Properties();
properties.setProperty(MRConfiguration.OUTPUT_COMPRESS, "true");
properties.setProperty(MRConfiguration.OUTPUT_COMPRESSION_CODEC, "org.apache.hadoop.io.compress.SnappyCodec");
properties.setProperty("avro.output.codec", "snappy");
PigServer pigServer = new PigServer(ExecType.LOCAL, properties);
pigServer.setBatchOn();
String [] queries = {
" in = LOAD '" + Util.encodeEscape(testArrayFile) + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
" STORE in INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
};
for (String query: queries){
pigServer.registerQuery(query);
}
pigServer.executeBatch();
verifyResults(output, expected, "snappy");
}
示例7: assertAllDocumentsOk
import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
PigServer ps = setup(script, conf);
List<ExecJob> jobs = ps.executeBatch();
PigStats stats = jobs.get(0).getStatistics();
for (JobStats js : stats.getJobGraph()) {
Counters hadoopCounters = ((MRJobStats)js).getHadoopCounters();
assertNotNull(hadoopCounters);
VespaCounters counters = VespaCounters.get(hadoopCounters);
assertEquals(10, counters.getDocumentsSent());
assertEquals(0, counters.getDocumentsFailed());
assertEquals(10, counters.getDocumentsOk());
}
}
示例8: testPredicatePushdown
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testPredicatePushdown() throws Exception {
Configuration conf = new Configuration();
conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);
PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
pigServer.setValidateEachStatement(true);
String out = "target/out";
String out2 = "target/out2";
int rows = 10;
Data data = Storage.resetData(pigServer);
List<Tuple> list = new ArrayList<Tuple>();
for (int i = 0; i < rows; i++) {
list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
}
data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
pigServer.deleteFile(out);
pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
pigServer.executeBatch();
pigServer.deleteFile(out2);
pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
pigServer.registerQuery("STORE C into '" + out2 +"' using mock.Storage();");
List<ExecJob> jobs = pigServer.executeBatch();
long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();
assertEquals(2, recordsRead);
}
示例9: testExplicitSPLIT
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testExplicitSPLIT() throws Exception {
PigServer pigServer = newPigServer();
Data data = Storage.resetData(pigServer);
data.set("input",
tuple("1", 2, "foo"),
tuple("2", 3, "bar"),
tuple("2", 1, "bar"),
tuple("1", 4, "foo"));
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'input' using mock.Storage;");
pigServer.registerQuery("SPLIT A INTO B IF $0 == '1', C IF $0 == '2';");
pigServer.registerQuery("STORE B INTO 'output1' using mock.Storage;");
pigServer.registerQuery("STORE C INTO 'output2' using mock.Storage;");
pigServer.executeBatch();
assertEquals(
Arrays.asList(
tuple("1", 2, "foo"),
tuple("1", 4, "foo")
),
sortByIndex(data.get("output1"), 0));
assertEquals(
Arrays.asList(
tuple("2", 3, "bar"),
tuple("2", 1, "bar")
),
sortByIndex(data.get("output2"), 0));
pigServer.shutdown();
}
示例10: testMapOnlyMultiQueryStores
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testMapOnlyMultiQueryStores() throws Exception {
PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
for(int i = 0; i < MAX; i++) {
int t = r.nextInt(100);
pw.println(t);
}
pw.close();
PigServer pigServer = new PigServer(ExecType.MAPREDUCE,
cluster.getProperties());
pigServer.setBatchOn();
pigServer.registerQuery("a = load '" + file + "';");
pigServer.registerQuery("b = filter a by $0 > 50;");
pigServer.registerQuery("c = filter a by $0 <= 50;");
pigServer.registerQuery("store b into '/tmp/outout1';");
pigServer.registerQuery("store c into '/tmp/outout2';");
List<ExecJob> jobs = pigServer.executeBatch();
PigStats stats = jobs.get(0).getStatistics();
assertTrue(stats.getOutputLocations().size() == 2);
cluster.getFileSystem().delete(new Path(file), true);
cluster.getFileSystem().delete(new Path("/tmp/outout1"), true);
cluster.getFileSystem().delete(new Path("/tmp/outout2"), true);
MRJobStats js = (MRJobStats)stats.getJobGraph().getSinks().get(0);
Map<String, Long> entry = js.getMultiStoreCounters();
long counter = 0;
for (Long val : entry.values()) {
counter += val;
}
assertEquals(MAX, counter);
}
示例11: testImplicitSPLIT
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testImplicitSPLIT() throws Exception {
PigServer pigServer = newPigServer();
Data data = Storage.resetData(pigServer);
data.set("input",
tuple("1", 2, "foo"),
tuple("2", 3, "bar"),
tuple("2", 1, "bar"),
tuple("1", 4, "foo"));
pigServer.setBatchOn();
pigServer.registerQuery("A = LOAD 'input' using mock.Storage;");
pigServer.registerQuery("B = FILTER A BY $0 == '1';");
pigServer.registerQuery("C = FILTER A BY $0 == '2';");
pigServer.registerQuery("STORE B INTO 'output1' using mock.Storage;");
pigServer.registerQuery("STORE C INTO 'output2' using mock.Storage;");
pigServer.executeBatch();
assertEquals(
Arrays.asList(
tuple("1", 2, "foo"),
tuple("1", 4, "foo")
),
sortByIndex(data.get("output1"), 0));
assertEquals(
Arrays.asList(
tuple("2", 3, "bar"),
tuple("2", 1, "bar")
),
sortByIndex(data.get("output2"), 0));
pigServer.shutdown();
}
示例12: testIgnoreCache
import org.apache.pig.PigServer; //导入方法依赖的package包/类
public void testIgnoreCache(String query1, String query2) throws Exception {
PigServer pigServer = newPigServer();
Data data = Storage.resetData(pigServer);
data.set("input",
tuple("test1"),
tuple("test2"));
pigServer.setBatchOn();
pigServer.registerQuery(query1);
pigServer.executeBatch();
List<Tuple> originalOutput = data.get("output");
LOG.debug("After first query: " + originalOutput);
data = Storage.resetData(pigServer);
data.set("input",
tuple("test3"),
tuple("test4"));
pigServer.registerQuery(query2);
pigServer.executeBatch();
LOG.debug("After second query: " + data.get("output"));
Assert.assertFalse(
originalOutput.equals(
data.get("output")));
pigServer.shutdown();
}
示例13: testUnionOnSchemaAdditionalColumnsWithImplicitSplit
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException {
PigServer pig = new PigServer(ExecType.LOCAL);
Data data = Storage.resetData(pig);
// Use batch to force multiple outputs from relation l3. This causes
// ImplicitSplitInsertVisitor to call SchemaResetter.
pig.setBatchOn();
String query =
" l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);"
+ "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);"
+ "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);"
+ "u = union onschema l1, l2, l3;"
+ "store u into 'out1' using mock.Storage;"
+ "store l3 into 'out2' using mock.Storage;"
;
Util.registerMultiLineQuery(pig, query);
pig.executeBatch();
List<Tuple> list1 = data.get("out1");
List<Tuple> list2 = data.get("out2");
List<Tuple> expectedRes =
Util.getTuplesFromConstantTupleStrings(
new String[] {
"(1,2,null,null)",
"(5,3,null,null)",
"(1,2,3,null)",
"(4,5,6,null)",
});
Util.checkQueryOutputsAfterSort(list1, expectedRes);
assertEquals(0, list2.size());
}
示例14: testShellCommandOrder
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testShellCommandOrder() throws Throwable {
PigServer server = new PigServer(ExecType.LOCAL, new Properties());
String strRemove = "rm";
if (Util.WINDOWS)
{
strRemove = "del";
}
File inputFile = File.createTempFile("testInputFile", ".txt");
PrintWriter pwInput = new PrintWriter(new FileWriter(inputFile));
pwInput.println("1");
pwInput.close();
File inputScript = File.createTempFile("testInputScript", "");
File outputFile = File.createTempFile("testOutputFile", ".txt");
outputFile.delete();
PrintWriter pwScript = new PrintWriter(new FileWriter(inputScript));
pwScript.println("a = load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "';");
pwScript.println("store a into '" + Util.encodeEscape(outputFile.getAbsolutePath()) + "';");
pwScript.println("sh " + strRemove + " " + Util.encodeEscape(inputFile.getAbsolutePath()));
pwScript.close();
InputStream inputStream = new FileInputStream(inputScript.getAbsoluteFile());
server.setBatchOn();
server.registerScript(inputStream);
List<ExecJob> execJobs = server.executeBatch();
assertTrue(execJobs.get(0).getStatus() == JOB_STATUS.COMPLETED);
}
示例15: testBzipStoreInMultiQuery2
import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testBzipStoreInMultiQuery2() throws Exception {
String[] inputData = new String[] {
"1\t2\r3\t4"
};
String inputFileName = "input2.txt";
Util.createInputFile(cluster, inputFileName, inputData);
PigServer pig = new PigServer(cluster.getExecType(), properties);
PigContext pigContext = pig.getPigContext();
pigContext.getProperties().setProperty( "output.compression.enabled", "true" );
pigContext.getProperties().setProperty( "output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec" );
pig.setBatchOn();
pig.registerQuery("a = load '" + inputFileName + "';");
pig.registerQuery("store a into 'output2.bz2';");
pig.registerQuery("store a into 'output2';");
pig.executeBatch();
FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
pig.getPigContext().getProperties()));
FileStatus[] outputFiles = fs.listStatus(new Path("output2"),
Util.getSuccessMarkerPathFilter());
assertTrue(outputFiles[0].getLen() > 0);
outputFiles = fs.listStatus(new Path("output2.bz2"),
Util.getSuccessMarkerPathFilter());
assertTrue(outputFiles[0].getLen() > 0);
}