当前位置: 首页>>代码示例>>Java>>正文


Java MROperPlan.getSuccessors方法代码示例

本文整理汇总了Java中org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan.getSuccessors方法的典型用法代码示例。如果您正苦于以下问题:Java MROperPlan.getSuccessors方法的具体用法?Java MROperPlan.getSuccessors怎么用?Java MROperPlan.getSuccessors使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan的用法示例。


在下文中一共展示了MROperPlan.getSuccessors方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testNumReducersInLimit

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
/**
 * Test to ensure that the order by without parallel followed by a limit, i.e., top k
 * always produces the correct number of map reduce jobs. In the testcase below since
 * we are running the unit test locally, we will get reduce parallelism as 1. So we will
 * NOT introduce the extra MR job to do a final limit
 */
@Test
public void testNumReducersInLimit() throws Exception {
	String query = "a = load 'input';" +
	"b = order a by $0;" +
	"c = limit b 10;" +
	"store c into 'output';";

	PhysicalPlan pp = Util.buildPp(pigServer, query);
	MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
	MapReduceOper mrOper = mrPlan.getRoots().get(0);
	int count = 1;

	while(mrPlan.getSuccessors(mrOper) != null) {
		mrOper = mrPlan.getSuccessors(mrOper).get(0);
		++count;
	}
	assertEquals(3, count);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:25,代码来源:TestMRCompiler.java

示例2: testNumReducersInLimitWithParallel

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
/**
 * Test to ensure that the order by with parallel followed by a limit, i.e., top k
 * always produces the correct number of map reduce jobs
 */
@Test
public void testNumReducersInLimitWithParallel() throws Exception {
	String query = "a = load 'input';" +
	"b = order a by $0 parallel 2;" +
	"c = limit b 10;" + "store c into 'output';";

	PhysicalPlan pp = Util.buildPp(pigServerMR, query);
	MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

	LimitAdjuster la = new LimitAdjuster(mrPlan, pc);
    la.visit();
    la.adjust();

	MapReduceOper mrOper = mrPlan.getRoots().get(0);
	int count = 1;

	while(mrPlan.getSuccessors(mrOper) != null) {
		mrOper = mrPlan.getSuccessors(mrOper).get(0);
		++count;
	}
	assertEquals(4, count);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:27,代码来源:TestMRCompiler.java

示例3: testLimitAdjusterFuncShipped

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
@Test
public void testLimitAdjusterFuncShipped() throws Exception{
    String query = "a = load 'input';" +
    "b = order a by $0 parallel 2;" +
    "c = limit b 7;" + "store c into 'output' using "
            + PigStorageNoDefCtor.class.getName() + "('\t');";

    PhysicalPlan pp = Util.buildPp(pigServerMR, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    LimitAdjuster la = new LimitAdjuster(mrPlan, pc);
    la.visit();
    la.adjust();

    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    int count = 1;

    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }
    assertEquals(4, count);

    MapReduceOper op = mrPlan.getLeaves().get(0);
    assertTrue(op.UDFs.contains(new FuncSpec(PigStorageNoDefCtor.class.getName())+"('\t')"));
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:27,代码来源:TestMRCompiler.java

示例4: testPoissonSampleOptimizer

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
@Test
public void testPoissonSampleOptimizer() throws Exception {
    String query = " A = load 'input' using PigStorage('\t');" + 
    "B = load 'input' using PigStorage('\t');" + 
    " C = join A by $0, B by $0 using 'skewed';" +
    "store C into 'output';";
    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    int count = 1;
    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }        
    // Before optimizer visits, number of MR jobs = 3.
    assertEquals(3,count);

    SampleOptimizer so = new SampleOptimizer(mrPlan, pc);
    so.visit();

    count = 1;
    mrOper = mrPlan.getRoots().get(0);
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }        
    // After optimizer visits, number of MR jobs = 2
    assertEquals(2,count);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:31,代码来源:TestSampleOptimizer.java

示例5: testOrderByUDFSet

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
@Test
public void testOrderByUDFSet() throws Exception {
    String query = "a = load 'input1' using BinStorage();" + 
    "b = order a by $0;" + "store b into 'output';";
    
    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    
    int count = 1;
    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }        
    // Before optimizer visits, number of MR jobs = 3.
    assertEquals(3,count);

    SampleOptimizer so = new SampleOptimizer(mrPlan, pc);
    so.visit();

    count = 1;
    mrOper = mrPlan.getRoots().get(0);
    // the first mrOper should be the sampling job - it's udf list should only
    // contain BinStorage
    assertTrue(mrOper.UDFs.size()==1);
    assertTrue(mrOper.UDFs.contains("BinStorage"));
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        // the second mr oper is the real order by job - it's udf list should
        // contain BinStorage corresponding to the load and PigStorage
        // corresponding to the store
        assertTrue(mrOper.UDFs.size()==2);
        assertTrue(mrOper.UDFs.contains("BinStorage"));
        assertTrue(mrOper.UDFs.contains("org.apache.pig.builtin.PigStorage"));
        ++count;
    }        
    // After optimizer visits, number of MR jobs = 2
    assertEquals(2,count);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:40,代码来源:TestSampleOptimizer.java

示例6: addWorkflowAdjacenciesToConf

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
public void addWorkflowAdjacenciesToConf(MROperPlan mrop, Configuration conf) {
    for (MapReduceOper source : mrop) {
        List<String> targets = new ArrayList<String>();
        if (mrop.getSuccessors(source) != null) {
            for (MapReduceOper target : mrop.getSuccessors(source)) {
                targets.add(target.getOperatorKey().toString());
            }
        }
        String[] s = new String[targets.size()];
        conf.setStrings("mapreduce.workflow.adjacency." + source.getOperatorKey().toString(), targets.toArray(s));
    }
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:13,代码来源:MRScriptState.java

示例7: adjustNumReducers

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
/**
 * Adjust the number of reducers based on the default_parallel, requested parallel and estimated
 * parallel. For sampler jobs, we also adjust the next job in advance to get its runtime parallel as
 * the number of partitions used in the sampler.
 * @param plan the MR plan
 * @param mro the MR operator
 * @param nwJob the current job
 * @throws IOException
 */
public void adjustNumReducers(MROperPlan plan, MapReduceOper mro,
        org.apache.hadoop.mapreduce.Job nwJob) throws IOException {
    int jobParallelism = calculateRuntimeReducers(mro, nwJob);

    if (mro.isSampler() && plan.getSuccessors(mro) != null) {
        // We need to calculate the final number of reducers of the next job (order-by or skew-join)
        // to generate the quantfile.
        MapReduceOper nextMro = plan.getSuccessors(mro).get(0);

        // Here we use the same conf and Job to calculate the runtime #reducers of the next job
        // which is fine as the statistics comes from the nextMro's POLoads
        int nPartitions = calculateRuntimeReducers(nextMro, nwJob);

        // set the runtime #reducer of the next job as the #partition
        ParallelConstantVisitor visitor =
                new ParallelConstantVisitor(mro.reducePlan, nPartitions);
        visitor.visit();
    }
    log.info("Setting Parallelism to " + jobParallelism);

    Configuration conf = nwJob.getConfiguration();

    // set various parallelism into the job conf for later analysis, PIG-2779
    conf.setInt(PigImplConstants.REDUCER_DEFAULT_PARALLELISM, pigContext.defaultParallel);
    conf.setInt(PigImplConstants.REDUCER_REQUESTED_PARALLELISM, mro.requestedParallelism);
    conf.setInt(PigImplConstants.REDUCER_ESTIMATED_PARALLELISM, mro.estimatedParallelism);

    // this is for backward compatibility, and we encourage to use runtimeParallelism at runtime
    mro.requestedParallelism = jobParallelism;

    // finally set the number of reducers
    conf.setInt(MRConfiguration.REDUCE_TASKS, jobParallelism);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:43,代码来源:JobControlCompiler.java

示例8: testOptimizerFired

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
@Test
public void testOptimizerFired() throws Exception{
    String query = " A = load 'input' using PigStorage('\t');" +
    " B = order A by $0;" + "store B into 'output';";
    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    int count = 1;
    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }
    
    // Before optimizer visits, number of MR jobs = 3.
    assertEquals(3,count);   

    SampleOptimizer so = new SampleOptimizer(mrPlan, pc);
    so.visit();

    count = 1;
    mrOper = mrPlan.getRoots().get(0);
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }
    
    // After optimizer visits, number of MR jobs = 2.
    assertEquals(2,count);

    // Test if RandomSampleLoader got pushed to top.
    mrOper = mrPlan.getRoots().get(0);
    List<PhysicalOperator> phyOps = mrOper.mapPlan.getRoots();
    assertEquals(1, phyOps.size());
    assertTrue(phyOps.get(0) instanceof POLoad);
    assertTrue(((POLoad)phyOps.get(0)).getLFile().getFuncName().equals("org.apache.pig.impl.builtin.RandomSampleLoader"));

    // Test RandomSampleLoader is not present anymore in second MR job.
    phyOps = mrPlan.getSuccessors(mrOper).get(0).mapPlan.getRoots();
    assertEquals(1, phyOps.size());
    assertTrue(phyOps.get(0) instanceof POLoad);
    assertFalse(((POLoad)phyOps.get(0)).getLFile().getFuncName().equals("org.apache.pig.impl.builtin.RandomSampleLoader"));
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:44,代码来源:TestSampleOptimizer.java

示例9: testOptimizerNotFired

import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入方法依赖的package包/类
@Test
public void testOptimizerNotFired() throws Exception{
    String query = " A = load 'input' using PigStorage('\t');" + "B = group A by $0;" +
    " C = order B by $0;" + "store C into 'output';";
    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    int count = 1;
    MapReduceOper mrOper = mrPlan.getRoots().get(0);
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }        
    // Before optimizer visits, number of MR jobs = 3.
    assertEquals(3,count);

    SampleOptimizer so = new SampleOptimizer(mrPlan, pc);
    so.visit();

    count = 1;
    mrOper = mrPlan.getRoots().get(0);
    while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
    }        
    
    // After optimizer visits, number of MR jobs = 3. Since here
    // optimizer is not fired.
    assertEquals(3,count);

    // Test Sampler is not moved and is present in 2nd MR job.
    mrOper = mrPlan.getRoots().get(0);
    List<PhysicalOperator> phyOps = mrOper.mapPlan.getRoots();
    assertEquals(1, phyOps.size());
    assertTrue(phyOps.get(0) instanceof POLoad);
    assertFalse(((POLoad)phyOps.get(0)).getLFile().getFuncName().equals("org.apache.pig.impl.builtin.RandomSampleLoader"));

    phyOps = mrPlan.getSuccessors(mrOper).get(0).mapPlan.getRoots();
    assertEquals(1, phyOps.size());
    assertTrue(phyOps.get(0) instanceof POLoad);
    assertTrue(((POLoad)phyOps.get(0)).getLFile().getFuncName().equals("org.apache.pig.impl.builtin.RandomSampleLoader"));
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:43,代码来源:TestSampleOptimizer.java


注:本文中的org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan.getSuccessors方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。