当前位置: 首页>>代码示例>>Java>>正文


Java PigServer.executeBatch方法代码示例

本文整理汇总了Java中org.apache.pig.PigServer.executeBatch方法的典型用法代码示例。如果您正苦于以下问题:Java PigServer.executeBatch方法的具体用法?Java PigServer.executeBatch怎么用?Java PigServer.executeBatch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.pig.PigServer的用法示例。


在下文中一共展示了PigServer.executeBatch方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: runQuery

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
      throws Exception, ExecException, IOException, FrontendException {

   // create a data file
   String filename = TestHelper.createTempFile(data, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   patternString = patternString.replace("\\", "\\\\");
   String query = "A = LOAD '" + Util.encodeEscape(filename)
         + "' USING PigStorage(',') as (a,b,c);";

   String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
         + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
         + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";

   // Run Pig
   pig.setBatchOn();
   pig.registerQuery(query);
   pig.registerQuery(query2);

   pig.executeBatch();
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:23,代码来源:TestMultiStorageCompression.java

示例2: exec

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private PigStats exec(String query) throws IOException {
    LOG.info("Query to run:\n" + query);
    List<PigProgressNotificationListener> listeners = ScriptState.get().getAllListeners();
    PigContext pc = scriptContext.getPigContext();
    ScriptState scriptState = pc.getExecutionEngine().instantiateScriptState();
    ScriptState.start(scriptState);
    ScriptState.get().setScript(query);
    for (PigProgressNotificationListener listener : listeners) {
        ScriptState.get().registerListener(listener);
    }
    PigServer pigServer = new PigServer(scriptContext.getPigContext(), false);
    pigServer.setBatchOn();
    GruntParser grunt = new GruntParser(new StringReader(query), pigServer);
    grunt.setInteractive(false);
    try {
        grunt.parseStopOnError(true);
    } catch (ParseException e) {
        throw new IOException("Failed to parse script " + e.getMessage(), e);
    }
    pigServer.executeBatch();
    return PigStats.get();
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:23,代码来源:BoundScript.java

示例3: testStreamingStderrLogsShouldNotBePersistedByDefault

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testStreamingStderrLogsShouldNotBePersistedByDefault() throws Exception {

    Util.createInputFile(cluster, "mydummyinput.txt", new String[] { "dummy"});

    PigServer pig = new PigServer(ExecType.MAPREDUCE,cluster.getProperties());
    pig.setBatchOn();

    pig.registerQuery("define mycmd `echo dummy` ;");
    pig.registerQuery("A = load 'mydummyinput.txt' as (f1:chararray);");
    pig.registerQuery("B = stream A through mycmd;");
    pig.registerQuery("store B into 'output_dir_001' ;");
    pig.executeBatch();

    Assert.assertTrue(Util.exists(pig.getPigContext(), "output_dir_001"));
    Assert.assertFalse(Util.exists(pig.getPigContext(), "output_dir_001/_logs/mycmd"));

}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:19,代码来源:TestStreaming.java

示例4: testPositive

import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
 * Verify that ASSERT operator works
 * @throws Exception
 */
@Test
public void testPositive() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3)
            );

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("ASSERT A BY i > 0;");
    pigServer.registerQuery("STORE A INTO 'bar' USING mock.Storage();");

    pigServer.executeBatch();

    List<Tuple> out = data.get("bar");
    assertEquals(3, out.size());
    assertEquals(tuple(1), out.get(0));
    assertEquals(tuple(2), out.get(1));
    assertEquals(tuple(3), out.get(2));
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:29,代码来源:TestAssert.java

示例5: runQuery

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
      throws Exception, ExecException, IOException, FrontendException {
   
   // create a data file
   String filename = TestHelper.createTempFile(data, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   patternString = patternString.replace("\\", "\\\\");
   String query = "A = LOAD '" + Util.encodeEscape(filename)
         + "' USING PigStorage(',') as (a,b,c);";

   String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
         + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
         + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";

   // Run Pig
   pig.setBatchOn();
   pig.registerQuery(query);
   pig.registerQuery(query2);

   pig.executeBatch();
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:23,代码来源:TestMultiStorageCompression.java

示例6: testArrayWithSnappyCompression

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testArrayWithSnappyCompression() throws IOException {
    String output= outbasedir + "testArrayWithSnappyCompression";
    String expected = basedir + "expected_testArrayDefault.avro";

    deleteDirectory(new File(output));

    Properties properties = new Properties();
    properties.setProperty(MRConfiguration.OUTPUT_COMPRESS, "true");
    properties.setProperty(MRConfiguration.OUTPUT_COMPRESSION_CODEC, "org.apache.hadoop.io.compress.SnappyCodec");
    properties.setProperty("avro.output.codec", "snappy");
    PigServer pigServer = new PigServer(ExecType.LOCAL, properties);
    pigServer.setBatchOn();
    String [] queries = {
       " in = LOAD '" + Util.encodeEscape(testArrayFile) + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
       " STORE in INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
        };
    for (String query: queries){
        pigServer.registerQuery(query);
    }
    pigServer.executeBatch();
    verifyResults(output, expected, "snappy");
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:24,代码来源:TestAvroStorage.java

示例7: assertAllDocumentsOk

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
    PigServer ps = setup(script, conf);
    List<ExecJob> jobs = ps.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    for (JobStats js : stats.getJobGraph()) {
        Counters hadoopCounters = ((MRJobStats)js).getHadoopCounters();
        assertNotNull(hadoopCounters);
        VespaCounters counters = VespaCounters.get(hadoopCounters);
        assertEquals(10, counters.getDocumentsSent());
        assertEquals(0, counters.getDocumentsFailed());
        assertEquals(10, counters.getDocumentsOk());
    }
}
 
开发者ID:vespa-engine,项目名称:vespa,代码行数:14,代码来源:VespaStorageTest.java

示例8: testPredicatePushdown

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testPredicatePushdown() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);

  PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
  pigServer.setValidateEachStatement(true);

  String out = "target/out";
  String out2 = "target/out2";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();

  pigServer.deleteFile(out2);
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
  pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
  pigServer.registerQuery("STORE C into '" + out2 +"' using mock.Storage();");
  List<ExecJob> jobs = pigServer.executeBatch();

  long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();

  assertEquals(2, recordsRead);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:34,代码来源:TestParquetLoader.java

示例9: testExplicitSPLIT

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testExplicitSPLIT() throws Exception {
    PigServer pigServer = newPigServer();
    Data data = Storage.resetData(pigServer);
    data.set("input",
            tuple("1", 2, "foo"),
            tuple("2", 3, "bar"),
            tuple("2", 1, "bar"),
            tuple("1", 4, "foo"));

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'input' using mock.Storage;");
    pigServer.registerQuery("SPLIT A INTO B IF $0 == '1', C IF $0 == '2';");
    pigServer.registerQuery("STORE B INTO 'output1' using mock.Storage;");
    pigServer.registerQuery("STORE C INTO 'output2' using mock.Storage;");
    pigServer.executeBatch();

    assertEquals(
            Arrays.asList(
                    tuple("1", 2, "foo"),
                    tuple("1", 4, "foo")
                    ),
            sortByIndex(data.get("output1"), 0));
    assertEquals(
            Arrays.asList(
                    tuple("2", 3, "bar"),
                    tuple("2", 1, "bar")
                    ),
            sortByIndex(data.get("output2"), 0));

    pigServer.shutdown();
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:33,代码来源:TestSpark.java

示例10: testMapOnlyMultiQueryStores

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testMapOnlyMultiQueryStores() throws Exception {
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    for(int i = 0; i < MAX; i++) {
        int t = r.nextInt(100);
        pw.println(t);
    }
    pw.close();
    
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, 
            cluster.getProperties());
    pigServer.setBatchOn();
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = filter a by $0 > 50;");
    pigServer.registerQuery("c = filter a by $0 <= 50;");
    pigServer.registerQuery("store b into '/tmp/outout1';");
    pigServer.registerQuery("store c into '/tmp/outout2';");
    List<ExecJob> jobs = pigServer.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    assertTrue(stats.getOutputLocations().size() == 2);
    
    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("/tmp/outout1"), true);
    cluster.getFileSystem().delete(new Path("/tmp/outout2"), true);

    MRJobStats js = (MRJobStats)stats.getJobGraph().getSinks().get(0);
    
    Map<String, Long> entry = js.getMultiStoreCounters();
    long counter = 0;
    for (Long val : entry.values()) {
        counter += val;
    }
    
    assertEquals(MAX, counter);       
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:36,代码来源:TestCounters.java

示例11: testImplicitSPLIT

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testImplicitSPLIT() throws Exception {
    PigServer pigServer = newPigServer();
    Data data = Storage.resetData(pigServer);
    data.set("input",
            tuple("1", 2, "foo"),
            tuple("2", 3, "bar"),
            tuple("2", 1, "bar"),
            tuple("1", 4, "foo"));

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'input' using mock.Storage;");
    pigServer.registerQuery("B = FILTER A BY $0 == '1';");
    pigServer.registerQuery("C = FILTER A BY $0 == '2';");
    pigServer.registerQuery("STORE B INTO 'output1' using mock.Storage;");
    pigServer.registerQuery("STORE C INTO 'output2' using mock.Storage;");
    pigServer.executeBatch();

    assertEquals(
            Arrays.asList(
                    tuple("1", 2, "foo"),
                    tuple("1", 4, "foo")
                    ),
            sortByIndex(data.get("output1"), 0));
    assertEquals(
            Arrays.asList(
                    tuple("2", 3, "bar"),
                    tuple("2", 1, "bar")
                    ),
            sortByIndex(data.get("output2"), 0));

    pigServer.shutdown();
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:34,代码来源:TestSpark.java

示例12: testIgnoreCache

import org.apache.pig.PigServer; //导入方法依赖的package包/类
public void testIgnoreCache(String query1, String query2) throws Exception {
    PigServer pigServer = newPigServer();

    Data data = Storage.resetData(pigServer);
    data.set("input",
            tuple("test1"),
            tuple("test2"));

    pigServer.setBatchOn();
    pigServer.registerQuery(query1);
    pigServer.executeBatch();

    List<Tuple> originalOutput = data.get("output");
    LOG.debug("After first query: " + originalOutput);

    data = Storage.resetData(pigServer);
    data.set("input",
            tuple("test3"),
            tuple("test4"));
    pigServer.registerQuery(query2);
    pigServer.executeBatch();

    LOG.debug("After second query: " + data.get("output"));

    Assert.assertFalse(
            originalOutput.equals(
                    data.get("output")));

    pigServer.shutdown();
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:31,代码来源:TestSpark.java

示例13: testUnionOnSchemaAdditionalColumnsWithImplicitSplit

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    Data data = Storage.resetData(pig);
    
    // Use batch to force multiple outputs from relation l3. This causes 
    // ImplicitSplitInsertVisitor to call SchemaResetter. 
    pig.setBatchOn();
    
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);"
        + "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);" 
        + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);"
        + "u = union onschema l1, l2, l3;"
        + "store u into 'out1' using mock.Storage;"
        + "store l3 into 'out2' using mock.Storage;"
    ;

    Util.registerMultiLineQuery(pig, query);
    
    pig.executeBatch();
    
    
    List<Tuple> list1 = data.get("out1");
    List<Tuple> list2 = data.get("out2");
    
    List<Tuple> expectedRes = 
            Util.getTuplesFromConstantTupleStrings(
                    new String[] {
                            "(1,2,null,null)",
                            "(5,3,null,null)",
                            "(1,2,3,null)",
                            "(4,5,6,null)",
                    });
    
    Util.checkQueryOutputsAfterSort(list1, expectedRes);
    
    assertEquals(0, list2.size());
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:40,代码来源:TestUnionOnSchema.java

示例14: testShellCommandOrder

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testShellCommandOrder() throws Throwable {
    PigServer server = new PigServer(ExecType.LOCAL, new Properties());

    String strRemove = "rm";

    if (Util.WINDOWS)
    {
        strRemove = "del";
    }

    File inputFile = File.createTempFile("testInputFile", ".txt");
    PrintWriter pwInput = new PrintWriter(new FileWriter(inputFile));
    pwInput.println("1");
    pwInput.close();

    File inputScript = File.createTempFile("testInputScript", "");
    File outputFile = File.createTempFile("testOutputFile", ".txt");
    outputFile.delete();
    PrintWriter pwScript = new PrintWriter(new FileWriter(inputScript));
    pwScript.println("a = load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "';");
    pwScript.println("store a into '" + Util.encodeEscape(outputFile.getAbsolutePath()) + "';");
    pwScript.println("sh " + strRemove + " " + Util.encodeEscape(inputFile.getAbsolutePath()));
    pwScript.close();

    InputStream inputStream = new FileInputStream(inputScript.getAbsoluteFile());
    server.setBatchOn();
    server.registerScript(inputStream);
    List<ExecJob> execJobs = server.executeBatch();
    assertTrue(execJobs.get(0).getStatus() == JOB_STATUS.COMPLETED);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:32,代码来源:TestGrunt.java

示例15: testBzipStoreInMultiQuery2

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(cluster.getExecType(), properties);
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty( "output.compression.enabled", "true" );
    pigContext.getProperties().setProperty( "output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec" );

    pig.setBatchOn();
    pig.registerQuery("a = load '" +  inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output2.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:31,代码来源:TestBZip.java


注:本文中的org.apache.pig.PigServer.executeBatch方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。