本文整理汇总了Java中org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan类的典型用法代码示例。如果您正苦于以下问题:Java MROperPlan类的具体用法?Java MROperPlan怎么用?Java MROperPlan使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
MROperPlan类属于org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans包,在下文中一共展示了MROperPlan类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testUDFInMergedCoGroup
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testUDFInMergedCoGroup() throws Exception {
String query = "a = load 'input1' using " + TestCollectableLoadFunc.class.getName() + "();" +
"b = load 'input2' using " + TestIndexableLoadFunc.class.getName() + "();" +
"c = cogroup a by $0, b by $0 using 'merge';" +
"store c into 'output';";
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
MapReduceOper mrOper = mrPlan.getRoots().get(0);
assertTrue(mrOper.UDFs.contains(TestCollectableLoadFunc.class.getName()));
mrOper = mrPlan.getSuccessors(mrOper).get(0);
assertTrue(mrOper.UDFs.contains(TestCollectableLoadFunc.class.getName()));
assertTrue(mrOper.UDFs.contains(TestIndexableLoadFunc.class.getName()));
}
示例2: testDefaultParallelInSort
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testDefaultParallelInSort() throws Throwable {
// default_parallel is considered only at runtime, so here we only test requested parallel
// more thorough tests can be found in TestNumberOfReducers.java
String query = "a = load 'input';" + "b = order a by $0 parallel 100;" + "store b into 'output';";
PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
PhysicalPlan pp = Util.buildPp(ps, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
// Get the sort job
Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
int counter = 0;
while (iter.hasNext()) {
MapReduceOper op = iter.next();
counter++;
if (op.isGlobalSort()) {
assertTrue(op.getRequestedParallelism()==100);
}
}
assertEquals(3, counter);
pc.defaultParallel = -1;
}
示例3: testNumReducersInLimitWithParallel
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
/**
* Test to ensure that the order by with parallel followed by a limit, i.e., top k
* always produces the correct number of map reduce jobs
*/
@Test
public void testNumReducersInLimitWithParallel() throws Exception {
String query = "a = load 'input';" +
"b = order a by $0 parallel 2;" +
"c = limit b 10;" + "store c into 'output';";
PhysicalPlan pp = Util.buildPp(pigServerMR, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
LimitAdjuster la = new LimitAdjuster(mrPlan, pc);
la.visit();
la.adjust();
MapReduceOper mrOper = mrPlan.getRoots().get(0);
int count = 1;
while(mrPlan.getSuccessors(mrOper) != null) {
mrOper = mrPlan.getSuccessors(mrOper).get(0);
++count;
}
assertEquals(4, count);
}
示例4: testDefaultParallel
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testDefaultParallel() throws Throwable {
pc.defaultParallel = 100;
String query = "a = load 'input';" + "b = group a by $0;" + "store b into 'output';";
PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
PhysicalPlan pp = Util.buildPp(ps, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
ConfigurationValidator.validatePigProperties(pc.getProperties());
Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
JobControlCompiler jcc = new JobControlCompiler(pc, conf);
JobControl jobControl = jcc.compile(mrPlan, "Test");
Job job = jobControl.getWaitingJobs().get(0);
int parallel = job.getJobConf().getNumReduceTasks();
assertEquals(100, parallel);
Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());
pc.defaultParallel = -1;
}
示例5: testDefaultParallelInSort
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testDefaultParallelInSort() throws Throwable {
// default_parallel is considered only at runtime, so here we only test requested parallel
// more thorough tests can be found in TestNumberOfReducers.java
String query = "a = load 'input';" + "b = order a by $0 parallel 100;" + "store b into 'output';";
PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
PhysicalPlan pp = Util.buildPp(ps, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
// Get the sort job
Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
int counter = 0;
while (iter.hasNext()) {
MapReduceOper op = iter.next();
counter++;
if (op.isGlobalSort()) {
assertTrue(op.getRequestedParallelism()==100);
}
}
assertEquals(3, counter);
pc.defaultParallel = -1;
}
示例6: testDefaultParallelInSkewJoin
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testDefaultParallelInSkewJoin() throws Throwable {
// default_parallel is considered only at runtime, so here we only test requested parallel
// more thorough tests can be found in TestNumberOfReducers.java
String query = "a = load 'input';" +
"b = load 'input';" +
"c = join a by $0, b by $0 using 'skewed' parallel 100;" +
"store c into 'output';";
PigServer ps = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
PhysicalPlan pp = Util.buildPp(ps, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
// Get the skew join job
Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
int counter = 0;
while (iter.hasNext()) {
MapReduceOper op = iter.next();
counter++;
if (op.isSkewedJoin()) {
assertTrue(op.getRequestedParallelism()==100);
}
}
assertEquals(3, counter);
pc.defaultParallel = -1;
}
示例7: testNumReducersInLimit
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
/**
* Test to ensure that the order by without parallel followed by a limit, i.e., top k
* always produces the correct number of map reduce jobs. In the testcase below since
* we are running the unit test locally, we will get reduce parallelism as 1. So we will
* NOT introduce the extra MR job to do a final limit
*/
@Test
public void testNumReducersInLimit() throws Exception {
String query = "a = load 'input';" +
"b = order a by $0;" +
"c = limit b 10;" +
"store c into 'output';";
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
MapReduceOper mrOper = mrPlan.getRoots().get(0);
int count = 1;
while(mrPlan.getSuccessors(mrOper) != null) {
mrOper = mrPlan.getSuccessors(mrOper).get(0);
++count;
}
assertEquals(3, count);
}
示例8: testDefaultParallelInSkewJoin
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testDefaultParallelInSkewJoin() throws Throwable {
// default_parallel is considered only at runtime, so here we only test requested parallel
// more thorough tests can be found in TestNumberOfReducers.java
String query = "a = load 'input';" +
"b = load 'input';" +
"c = join a by $0, b by $0 using 'skewed' parallel 100;" +
"store c into 'output';";
PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
PhysicalPlan pp = Util.buildPp(ps, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
// Get the skew join job
Iterator<MapReduceOper> iter = mrPlan.getKeys().values().iterator();
int counter = 0;
while (iter.hasNext()) {
MapReduceOper op = iter.next();
counter++;
if (op.isSkewedJoin()) {
assertTrue(op.getRequestedParallelism()==100);
}
}
assertEquals(3, counter);
pc.defaultParallel = -1;
}
示例9: testSchemaInStoreForDistinctLimit
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testSchemaInStoreForDistinctLimit() throws Exception {
//test if the POStore in the 2nd mr plan (that stores the actual output)
// has a schema
String query = "a = load 'input1' as (a : int,b :float ,c : int);" +
"b = distinct a;" +
"c = limit b 10;" +
"store c into 'output';";
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
MapReduceOper secondMrOper = mrPlan.getLeaves().get(0);
POStore store = (POStore)secondMrOper.reducePlan.getLeaves().get(0);
assertEquals(
"compare load and store schema",
store.getSchema(),
Utils.getSchemaFromString("a : int,b :float ,c : int")
);
}
示例10: testGroupConstWithParallel
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
/**
* Test parallelism for group by constant
* @throws Throwable
*/
@Test
public void testGroupConstWithParallel() throws Throwable {
PigContext pc = new PigContext(ExecType.MAPREDUCE, cluster.getProperties());
pc.defaultParallel = 100;
pc.connect();
String query = "a = load 'input';\n" + "b = group a by 1;" + "store b into 'output';";
PigServer pigServer = new PigServer( ExecType.MAPREDUCE, cluster.getProperties() );
PhysicalPlan pp = Util.buildPp( pigServer, query );
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
ConfigurationValidator.validatePigProperties(pc.getProperties());
Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
JobControlCompiler jcc = new JobControlCompiler(pc, conf);
JobControl jobControl = jcc.compile(mrPlan, "Test");
Job job = jobControl.getWaitingJobs().get(0);
int parallel = job.getJobConf().getNumReduceTasks();
assertEquals("parallism", 1, parallel);
}
示例11: testGroupNonConstWithParallel
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
/**
* Test parallelism for group by column
* @throws Throwable
*/
@Test
public void testGroupNonConstWithParallel() throws Throwable {
PigContext pc = new PigContext(ExecType.MAPREDUCE, cluster.getProperties());
pc.defaultParallel = 100;
pc.connect();
PigServer pigServer = new PigServer( ExecType.MAPREDUCE, cluster.getProperties() );
String query = "a = load 'input';\n" + "b = group a by $0;" + "store b into 'output';";
PhysicalPlan pp = Util.buildPp( pigServer, query );
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
ConfigurationValidator.validatePigProperties(pc.getProperties());
Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
JobControlCompiler jcc = new JobControlCompiler(pc, conf);
JobControl jobControl = jcc.compile(mrPlan, "Test");
Job job = jobControl.getWaitingJobs().get(0);
int parallel = job.getJobConf().getNumReduceTasks();
assertEquals("parallism", 100, parallel);
}
示例12: testSortOptimization3
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testSortOptimization3() throws Exception {
// Sort on the main key prefix / non main key prefix mixed
String query = ("A=LOAD 'input1' AS (a0, a1, a2);" +
"B = group A by $0;" +
"C = foreach B { D = limit A 10; E = order D by $1; F = order E by $0; generate group, F;};"+
"store C into 'output';");
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
so.visit();
assertEquals(1, so.getNumMRUseSecondaryKey());
assertEquals(2, so.getNumSortRemoved());
assertEquals(0, so.getDistinctChanged());
}
示例13: testSortOptimization4
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testSortOptimization4() throws Exception {
// Sort on the main key again
String query = ("A=LOAD 'input1' AS (a0, a1, a2);" +
"B = group A by $0;" +
"C = foreach B { D = limit A 10; E = order D by $0, $1, $2; generate group, E;};" +
"store C into 'output';");
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
so.visit();
assertEquals(1, so.getNumMRUseSecondaryKey());
assertEquals(1, so.getNumSortRemoved());
assertEquals(0, so.getDistinctChanged());
}
示例14: testSortOptimization5
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testSortOptimization5() throws Exception {
// Sort on the two keys, we can only take off 1
String query = ("A=LOAD 'input1' AS (a0, a1, a2);" +
"B = group A by $0;" +
"C = foreach B { D = limit A 10; E = order D by $1; F = order E by $2; generate group, F;};" +
"store C into 'output';");
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
so.visit();
assertEquals(1, so.getNumMRUseSecondaryKey());
assertEquals(1, so.getNumSortRemoved());
assertEquals(0, so.getDistinctChanged());
}
示例15: testSortOptimization6
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; //导入依赖的package包/类
@Test
public void testSortOptimization6() throws Exception {
// Sort desc
String query = ("A=LOAD 'input1' AS (a0, a1, a2);" +
"B = group A by $0;" +
"C = foreach B { D = order A by $0 desc; generate group, D;};" +
"store C into 'output';");
PhysicalPlan pp = Util.buildPp(pigServer, query);
MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
so.visit();
assertEquals(1, so.getNumMRUseSecondaryKey());
assertEquals(1, so.getNumSortRemoved());
assertEquals(0, so.getDistinctChanged());
}