本文整理汇总了Java中org.apache.pig.data.NonSpillableDataBag类的典型用法代码示例。如果您正苦于以下问题:Java NonSpillableDataBag类的具体用法?Java NonSpillableDataBag怎么用?Java NonSpillableDataBag使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
NonSpillableDataBag类属于org.apache.pig.data包,在下文中一共展示了NonSpillableDataBag类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public DataBag exec(Tuple input) throws IOException {
if(input == null || input.size() == 0) {
return null;
}
Map<String, Object> m = null;
//Input must be of type Map. This is verified at compile time
m = (Map<String, Object>)(input.get(0));
if(m == null) {
return null;
}
Collection c = m.values();
DataBag bag = new NonSpillableDataBag(c.size());
Iterator<Object> iter = c.iterator();
while(iter.hasNext()) {
Tuple t = TUPLE_FACTORY.newTuple(iter.next());
bag.add(t);
}
return bag;
}
示例2: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public DataBag exec(Tuple input) throws IOException {
if(input == null || input.size() == 0) {
return null;
}
Map<String, Object> m = null;
//Input must be of type Map. This is verified at compile time
m = (Map<String, Object>)(input.get(0));
if(m == null) {
return null;
}
DataBag bag = new NonSpillableDataBag(m.size());
for (String s : m.keySet()) {
Tuple t = TUPLE_FACTORY.newTuple(s);
bag.add(t);
}
return bag;
}
示例3: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
try {
// The assumption is that if the bag contents fits into
// an input tuple, it will not need to be spilled.
DataBag bag = new NonSpillableDataBag(input.size());
for (int i = 0; i < input.size(); ++i) {
final Object object = input.get(i);
if (object instanceof Tuple) {
bag.add( (Tuple) object);
} else {
Tuple tp2 = TupleFactory.getInstance().newTuple(1);
tp2.set(0, object);
bag.add(tp2);
}
}
return bag;
} catch (Exception ee) {
throw new RuntimeException("Error while creating a bag", ee);
}
}
示例4: testNonSpillableDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Test
public void testNonSpillableDataBag() throws Exception {
String[][] tupleContents = new String[][] {{"a", "b"},{"c", "d" }, { "e", "f"} };
NonSpillableDataBag bg = new NonSpillableDataBag();
for (int i = 0; i < tupleContents.length; i++) {
bg.add(Util.createTuple(tupleContents[i]));
}
Iterator<Tuple> it = bg.iterator();
int j = 0;
while(it.hasNext()) {
Tuple t = it.next();
assertEquals(Util.createTuple(tupleContents[j]), t);
j++;
}
assertEquals(tupleContents.length, j);
}
示例5: tuple
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private static Tuple tuple() throws ExecException {
TupleFactory tf = TupleFactory.getInstance();
Tuple t = tf.newTuple(TOP_LEVEL_COLS);
for (int i = 0; i < TOP_LEVEL_COLS; i++) {
Tuple ti = tf.newTuple(10);
for (int j = 0; j < 4; j++) {
ti.set(j, "foo"+i+","+j);
}
for (int k = 0; k < 4; k++) {
ti.set(4+k, (long)k);
}
for (int l = 0; l < 2; l++) {
DataBag bag = new NonSpillableDataBag();
for (int m = 0; m < 10; m++) {
bag.add(tf.newTuple((Object)new Integer(m)));
}
ti.set(8+l, bag);
}
t.set(i, ti);
}
return t;
}
示例6: POFRJoin
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp,
List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes,
FileSpec[] replFiles, int fragment, boolean isLeftOuter,
Tuple nullTuple)
throws ExecException {
super(k, rp, inp);
phyPlanLists = ppLists;
this.fragment = fragment;
this.keyTypes = keyTypes;
this.replFiles = replFiles;
replicates = new Map[ppLists.size()];
LRs = new POLocalRearrange[ppLists.size()];
constExps = new ConstantExpression[ppLists.size()];
createJoinPlans(k);
processingPlan = false;
mTupleFactory = TupleFactory.getInstance();
List<Tuple> tupList = new ArrayList<Tuple>();
tupList.add(nullTuple);
nullBag = new NonSpillableDataBag(tupList);
this.isLeftOuterJoin = isLeftOuter;
}
示例7: exec
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) {
return null;
}
Map<String, Object> m = null;
// Input must be of type Map. This is verified at compile time
m = (Map<String, Object>) (input.get(0));
if (m == null) {
return null;
}
int initialSetSize = getInitialSetSize(m.values());
Set<Object> uniqueElements = new HashSet<Object>(initialSetSize);
DataBag bag = new NonSpillableDataBag();
Iterator<Object> iter = m.values().iterator();
while (iter.hasNext()) {
Object val = iter.next();
if (!uniqueElements.contains(val)) {
uniqueElements.add(val);
Tuple t = TUPLE_FACTORY.newTuple(val);
bag.add(t);
}
}
return bag;
}
示例8: getNextDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNextDataBag() throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=POStatus.STATUS_OK) {
if(input.returnStatus == POStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
示例9: createDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private DataBag createDataBag(int numBags) {
String bagType = null;
if (PigMapReduce.sJobConfInternal.get() != null) {
bagType = PigMapReduce.sJobConfInternal.get().get("pig.cachedbag.type");
}
if (bagType != null && bagType.equalsIgnoreCase("default")) {
return new NonSpillableDataBag();
}
return new InternalCachedBag(numBags);
}
示例10: POFRJoin
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp,
List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes,
FileSpec[] replFiles, int fragment, boolean isLeftOuter,
Tuple nullTuple,
Schema[] inputSchemas,
Schema[] keySchemas)
throws ExecException {
super(k, rp, inp);
phyPlanLists = ppLists;
this.fragment = fragment;
this.keyTypes = keyTypes;
this.replFiles = replFiles;
replicates = new TupleToMapKey[ppLists.size()];
LRs = new POLocalRearrange[ppLists.size()];
constExps = new ConstantExpression[ppLists.size()];
createJoinPlans(k);
processingPlan = false;
mTupleFactory = TupleFactory.getInstance();
List<Tuple> tupList = new ArrayList<Tuple>();
tupList.add(nullTuple);
nullBag = new NonSpillableDataBag(tupList);
this.isLeftOuterJoin = isLeftOuter;
if (inputSchemas != null) {
this.inputSchemas = inputSchemas;
} else {
this.inputSchemas = new Schema[replFiles == null ? 0 : replFiles.length];
}
if (keySchemas != null) {
this.keySchemas = keySchemas;
} else {
this.keySchemas = new Schema[replFiles == null ? 0 : replFiles.length];
}
}
示例11: getNextDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNextDataBag() throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=POStatus.STATUS_OK) {
if(input.returnStatus == POStatus.STATUS_NULL){
return input;
} else if (input.returnStatus == POStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
示例12: createDataBag
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private DataBag createDataBag(int numBags) {
if (!initialized) {
initialized = true;
if (PigMapReduce.sJobConfInternal.get() != null) {
String bagType = PigMapReduce.sJobConfInternal.get().get(PigConfiguration.PIG_CACHEDBAG_TYPE);
if (bagType != null && bagType.equalsIgnoreCase("default")) {
useDefaultBag = true;
}
}
}
return useDefaultBag ? new NonSpillableDataBag() : new InternalCachedBag(numBags);
}
示例13: generateRandomSortedSamples
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
private DataBag generateRandomSortedSamples(int numSamples, int max) throws Exception {
Random rand = new Random(1000);
List<Tuple> samples = new ArrayList<Tuple>();
for (int i=0; i<numSamples; i++) {
Tuple t = tFact.newTuple(1);
t.set(0, rand.nextInt(max));
samples.add(t);
}
Collections.sort(samples);
return new NonSpillableDataBag(samples);
}
示例14: getNext
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNext(DataBag db) throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=POStatus.STATUS_OK) {
if(input.returnStatus == POStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
示例15: getNext
import org.apache.pig.data.NonSpillableDataBag; //导入依赖的package包/类
@Override
public Result getNext(DataBag db) throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=SOStatus.STATUS_OK) {
if(input.returnStatus == SOStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = SOStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}