当前位置: 首页>>代码示例>>Java>>正文


Java PigServer.setBatchOn方法代码示例

本文整理汇总了Java中org.apache.pig.PigServer.setBatchOn方法的典型用法代码示例。如果您正苦于以下问题:Java PigServer.setBatchOn方法的具体用法?Java PigServer.setBatchOn怎么用?Java PigServer.setBatchOn使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.pig.PigServer的用法示例。


在下文中一共展示了PigServer.setBatchOn方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setup

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private PigServer setup(String script, Configuration conf) throws Exception {
    if (conf == null) {
        conf = new HdfsConfiguration();
    }
    conf.setIfUnset(VespaConfiguration.DRYRUN, "true");
    conf.setIfUnset(VespaConfiguration.ENDPOINT, "dummy-endpoint");

    // Parameter substitutions - can also be set by configuration
    Map<String, String> parameters = new HashMap<>();
    parameters.put("ENDPOINT", "endpoint-does-not-matter-in-dryrun,another-endpoint-that-does-not-matter");

    PigServer ps = new PigServer(ExecType.LOCAL, conf);
    ps.setBatchOn();
    ps.registerScript(script, parameters);

    return ps;
}
 
开发者ID:vespa-engine,项目名称:vespa,代码行数:18,代码来源:VespaStorageTest.java

示例2: testNumerOfColumnsWhenDatePartitionedFiles

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testNumerOfColumnsWhenDatePartitionedFiles() throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
            + ", '" + startingDate + ":" + endingDate + "')";

    System.out.println(funcSpecString);

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
            + funcSpecString + ";");
    Iterator<Tuple> result = server.openIterator("a");
    Tuple t = null;
    while ((t = result.next()) != null) {
        Assert.assertEquals(4, t.size());
        count++;
    }

    Assert.assertEquals(datePartitionedRowCount, count);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:26,代码来源:TestHiveColumnarLoader.java

示例3: runQuery

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
      throws Exception, ExecException, IOException, FrontendException {
   
   // create a data file
   String filename = TestHelper.createTempFile(data, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   patternString = patternString.replace("\\", "\\\\");
   String query = "A = LOAD '" + Util.encodeEscape(filename)
         + "' USING PigStorage(',') as (a,b,c);";

   String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
         + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
         + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";

   // Run Pig
   pig.setBatchOn();
   pig.registerQuery(query);
   pig.registerQuery(query2);

   pig.executeBatch();
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:23,代码来源:TestMultiStorageCompression.java

示例4: test1DayDatePartitionedFiles

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void test1DayDatePartitionedFiles() throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
            + ", '" + startingDate + ":" + startingDate + "')";

    System.out.println(funcSpecString);

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
            + funcSpecString + ";");
    Iterator<Tuple> result = server.openIterator("a");

    while ((result.next()) != null) {
        count++;
    }

    Assert.assertEquals(50, count);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:25,代码来源:TestHiveColumnarLoader.java

示例5: runQuery

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void runQuery(String outputPath, String compressionType)
      throws Exception, ExecException, IOException, FrontendException {

   // create a data file
   String filename = TestHelper.createTempFile(data, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   patternString = patternString.replace("\\", "\\\\");
   String query = "A = LOAD '" + Util.encodeEscape(filename)
         + "' USING PigStorage(',') as (a,b,c);";

   String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
         + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
         + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";

   // Run Pig
   pig.setBatchOn();
   pig.registerQuery(query);
   pig.registerQuery(query2);

   pig.executeBatch();
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:23,代码来源:TestMultiStorageCompression.java

示例6: testArrayWithSnappyCompression

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testArrayWithSnappyCompression() throws IOException {
    String output= outbasedir + "testArrayWithSnappyCompression";
    String expected = basedir + "expected_testArrayDefault.avro";

    deleteDirectory(new File(output));

    Properties properties = new Properties();
    properties.setProperty("mapred.output.compress", "true");
    properties.setProperty("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    properties.setProperty("avro.output.codec", "snappy");
    PigServer pigServer = new PigServer(ExecType.LOCAL, properties);
    pigServer.setBatchOn();
    String [] queries = {
       " in = LOAD '" + Util.encodeEscape(testArrayFile) + " ' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();",
       " STORE in INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
        };
    for (String query: queries){
        pigServer.registerQuery(query);
    }
    pigServer.executeBatch();
    verifyResults(output, expected, "snappy");
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:24,代码来源:TestAvroStorage.java

示例7: testReadingSingleFileNoProjections

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testReadingSingleFileNoProjections() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
            + ";");

    Iterator<Tuple> result = server.openIterator("a");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(3, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleRowCount, count);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:27,代码来源:TestHiveColumnarLoader.java

示例8: testFilterGroupCountStore

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testFilterGroupCountStore() throws Exception {
    File out = File.createTempFile("testFilterGroupCountStoreOutput", "");
    out.deleteOnExit();
    out.delete();

    PigServer pigServer = new PigServer(pigContext);
    pigServer.setBatchOn();
    pigServer.registerQuery("A = load " + A.toString() + " as (x, y);");
    pigServer.registerQuery("B = filter A by x < 5;");
    pigServer.registerQuery("C = group B by x;");
    pigServer.registerQuery("D = foreach C generate group as x, COUNT(B) as the_count;");
    pigServer.registerQuery("store D into '" +  Util.encodeEscape(out.getAbsolutePath()) + "';");
    Map<Operator, DataBag> derivedData = pigServer.getExamples(null);

    assertNotNull(derivedData);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:18,代码来源:TestExampleGenerator.java

示例9: testStreamingStderrLogsShouldNotBePersistedByDefault

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testStreamingStderrLogsShouldNotBePersistedByDefault() throws Exception {

    Util.createInputFile(cluster, "mydummyinput.txt", new String[] { "dummy"});

    PigServer pig = new PigServer(ExecType.MAPREDUCE,cluster.getProperties());
    pig.setBatchOn();

    pig.registerQuery("define mycmd `echo dummy` ;");
    pig.registerQuery("A = load 'mydummyinput.txt' as (f1:chararray);");
    pig.registerQuery("B = stream A through mycmd;");
    pig.registerQuery("store B into 'output_dir_001' ;");
    pig.executeBatch();

    Assert.assertTrue(Util.exists(pig.getPigContext(), "output_dir_001"));
    Assert.assertFalse(Util.exists(pig.getPigContext(), "output_dir_001/_logs/mycmd"));

}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:19,代码来源:TestStreaming.java

示例10: setup

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private PigServer setup(String script, String endpoint) throws Exception {
    Configuration conf = new HdfsConfiguration();
    Map<String, String> parameters = new HashMap<>();
    parameters.put("ENDPOINT", endpoint);

    PigServer ps = new PigServer(ExecType.LOCAL, conf);
    ps.setBatchOn();
    ps.registerScript(script, parameters);

    return ps;
}
 
开发者ID:vespa-engine,项目名称:vespa,代码行数:12,代码来源:VespaQueryTest.java

示例11: testScalarAliasesSplitClause

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testScalarAliasesSplitClause() throws Exception{
    Util.resetStateForExecModeSwitch();
    pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    String[] input = {
            "1\t5",
            "2\t10",
            "3\t20"
    };

    // Test the use of scalars in expressions
    String inputPath = "table_testScalarAliasesSplitClause";
    String output = "table_testScalarAliasesSplitClauseDir";
    Util.createInputFile(cluster, inputPath, input);
    // Test in script mode
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD '"+inputPath+"' as (a0: long, a1: double);");
    pigServer.registerQuery("B = group A all;");
    pigServer.registerQuery("C = foreach B generate COUNT(A) as count;");
    pigServer.registerQuery("split A into Y if (2 * C.count) < a1, X if a1 == 5;");
    pigServer.registerQuery("Store Y into '"+output+"';");
    pigServer.executeBatch();
    // Check output
    pigServer.registerQuery("Z = LOAD '"+output+"' as (a0: int, a1: double);");

    Iterator<Tuple> iter = pigServer.openIterator("Z");

    // Y gets only last 2 elements
    Tuple t = iter.next();
    assertTrue(t.toString().equals("(2,10.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(3,20.0)"));

    assertFalse(iter.hasNext());
    Util.deleteFile(cluster, output);

}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:39,代码来源:TestScalarAliases.java

示例12: registerQuery

import org.apache.pig.PigServer; //导入方法依赖的package包/类
private void registerQuery(PigServer pigServer, String pl) throws IOException {                
    GruntParser grunt = new GruntParser(new StringReader(pl));
    grunt.setInteractive(false);
    grunt.setParams(pigServer);
    pigServer.setBatchOn();
    try {
        grunt.parseStopOnError(true);
    } catch (ParseException e) {
        throw new IOException("Failed to parse query: " + pl, e);
    }
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:12,代码来源:BoundScript.java

示例13: testMultiStorage

import org.apache.pig.PigServer; //导入方法依赖的package包/类
/**
 * The actual method that run the test in local or cluster mode. 
 * 
 * @param pigServer
 * @param mode
 * @param queries
 * @throws IOException
 */
private void testMultiStorage( Mode mode, String outPath,
    String... queries) throws IOException {
  PigServer pigServer = (Mode.local == mode) ? this.pigServerLocal : this.pigServer;
  pigServer.setBatchOn();
  for (String query : queries) {
    pigServer.registerQuery(query);
  }
  pigServer.executeBatch();
  verifyResults(mode, outPath);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:19,代码来源:TestMultiStorage.java

示例14: testPredicatePushdown

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testPredicatePushdown() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);

  PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
  pigServer.setValidateEachStatement(true);

  String out = "target/out";
  String out2 = "target/out2";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();

  pigServer.deleteFile(out2);
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
  pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
  pigServer.registerQuery("STORE C into '" + out2 +"' using mock.Storage();");
  List<ExecJob> jobs = pigServer.executeBatch();

  long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();

  assertEquals(2, recordsRead);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:34,代码来源:TestParquetLoader.java

示例15: testUnionOnSchemaAdditionalColumnsWithImplicitSplit

import org.apache.pig.PigServer; //导入方法依赖的package包/类
@Test
public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    Data data = Storage.resetData(pig);
    
    // Use batch to force multiple outputs from relation l3. This causes 
    // ImplicitSplitInsertVisitor to call SchemaResetter. 
    pig.setBatchOn();
    
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);"
        + "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);" 
        + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);"
        + "u = union onschema l1, l2, l3;"
        + "store u into 'out1' using mock.Storage;"
        + "store l3 into 'out2' using mock.Storage;"
    ;

    Util.registerMultiLineQuery(pig, query);
    
    pig.executeBatch();
    
    
    List<Tuple> list1 = data.get("out1");
    List<Tuple> list2 = data.get("out2");
    
    List<Tuple> expectedRes = 
            Util.getTuplesFromConstantTupleStrings(
                    new String[] {
                            "(1,2,null,null)",
                            "(5,3,null,null)",
                            "(1,2,3,null)",
                            "(4,5,6,null)",
                    });
    
    Util.checkQueryOutputsAfterSort(list1, expectedRes);
    
    assertEquals(0, list2.size());
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:40,代码来源:TestUnionOnSchema.java


注:本文中的org.apache.pig.PigServer.setBatchOn方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。