本文整理汇总了Java中org.apache.pig.data.DataBag.size方法的典型用法代码示例。如果您正苦于以下问题:Java DataBag.size方法的具体用法?Java DataBag.size怎么用?Java DataBag.size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.data.DataBag
的用法示例。
在下文中一共展示了DataBag.size方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
/**
* Deduplicates tuples by grouping them by identifier stored in tuple[0] and picking the one with highest
* confidence level stored in tuple[confidenceLevelPosition].
*
* @param tuple {@link DataBag} holding group of tuples to be deduplicated
*/
@Override
public DataBag exec(Tuple tuple) throws IOException {
if (tuple == null || tuple.size() == 0) {
return null;
}
DataBag inputTuples = (DataBag) tuple.get(0);
if (inputTuples != null && inputTuples.size() > 1) {
// deduplicating only if more than one element
Map<String, Tuple> deduplicatedTuplesMap = new TreeMap<String, Tuple>();
Iterator<Tuple> inputTuplesIterator = inputTuples.iterator();
while (inputTuplesIterator.hasNext()) {
Tuple currentTuple = inputTuplesIterator.next();
updateStoredTupleWhenConfidenceLevelHigher(currentTuple, deduplicatedTuplesMap);
}
return BagFactory.getInstance().newDefaultBag(new ArrayList<Tuple>(deduplicatedTuplesMap.values()));
} else {
return inputTuples;
}
}
示例2: accumulate
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("accumulator is used");
isFirstCall_ = false;
}
if ((inputTuple == null) || (inputTuple.size() != 1)) {
return;
}
final DataBag bag = (DataBag) inputTuple.get(0);
if (bag == null || bag.size() == 0) { return; }
if (union_ == null) {
union_ = new Union<S>(sketchSize_, summaryFactory_);
}
updateUnion(bag, union_);
}
示例3: accumulate
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("accumulator is used");
isFirstCall_ = false;
}
if ((inputTuple == null) || (inputTuple.size() != 1)) {
return;
}
final DataBag bag = (DataBag) inputTuple.get(0);
if (bag == null || bag.size() == 0) { return; }
if (accumUnion_ == null) {
accumUnion_ = new ArrayOfDoublesSetOperationBuilder().setNominalEntries(sketchSize_)
.setNumberOfValues(numValues_).buildUnion();
}
updateUnion(bag, accumUnion_);
}
示例4: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple tuple) throws IOException {
if (tuple == null || tuple.size() != 1) {
return null;
}
DataBag db = (DataBag) tuple.get(0);
if (db == null || db.size() == 0) {
return null;
}
return db;
}
示例5: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataByteArray exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
Logger.getLogger(getClass()).info("Algebraic was used");
isFirstCall_ = false;
}
if (inputTuple == null || inputTuple.size() == 0) {
return getEmptySketch();
}
final Union union = new Union(lgK_);
final DataBag outerBag = (DataBag) inputTuple.get(0);
if (outerBag == null) {
return getEmptySketch();
}
for (final Tuple dataTuple: outerBag) {
final Object f0 = dataTuple.get(0); // inputTuple.bag0.dataTupleN.f0
if (f0 == null) {
continue;
}
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) { continue; }
// If field 0 of a dataTuple is a Bag, all innerTuples of this inner bag
// will be passed into the union.
// It is due to system bagged outputs from multiple mapper Initial functions.
// The Intermediate stage was bypassed.
updateUnion(innerBag, union);
} else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
// If field 0 of a dataTuple is a DataByteArray, we assume it is a sketch
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.update(HllSketch.wrap(Memory.wrap(dba.get())));
} else { // we should never get here
throw new IllegalArgumentException("dataTuple.Field0 is not a DataBag or DataByteArray: "
+ f0.getClass().getName());
}
}
return new DataByteArray(union.getResult(tgtHllType_).toCompactByteArray());
}
示例6: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
Logger.getLogger(getClass()).info("Algebraic was used");
isFirstCall_ = false;
}
if (inputTuple == null || inputTuple.size() == 0) {
return getEmptySketchTuple();
}
final DataBag outerBag = (DataBag) inputTuple.get(0);
if (outerBag == null) {
return getEmptySketchTuple();
}
final Union union = new Union(lgK_);
for (final Tuple dataTuple: outerBag) {
final Object f0 = dataTuple.get(0); // inputTuple.bag0.dataTupleN.f0
if (f0 == null) { continue; }
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) { continue; }
// If field 0 of a dataTuple is a Bag, all innerTuples of this inner bag
// will be passed into the union.
// It is due to system bagged outputs from multiple mapper Initial functions.
// The Intermediate stage was bypassed.
updateUnion(innerBag, union);
} else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
// If field 0 of a dataTuple is a DataByteArray, we assume it is a sketch
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.update(HllSketch.wrap(Memory.wrap(dba.get())));
} else { // we should never get here
throw new IllegalArgumentException("dataTuple.Field0 is not a DataBag or DataByteArray: "
+ f0.getClass().getName());
}
}
return tupleFactory_.newTuple(new DataByteArray(union.getResult(tgtHllType_).toCompactByteArray()));
}
示例7: accumulate
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("accumulator is used");
isFirstCall_ = false;
}
if ((inputTuple == null) || (inputTuple.size() != 1)) {
return;
}
final Object obj = inputTuple.get(0);
if (!(obj instanceof DataBag)) {
return;
}
final DataBag bag = (DataBag) inputTuple.get(0);
if (bag.size() == 0) {
return;
}
if (sketch_ == null) {
sketch_ = new ItemsSketch<T>(sketchSize_);
}
for (final Tuple innerTuple: bag) {
final int sz = innerTuple.size();
if ((sz != 1) || (innerTuple.get(0) == null)) {
continue;
}
final ItemsSketch<T> incomingSketch = Util.deserializeSketchFromTuple(innerTuple, serDe_);
sketch_.merge(incomingSketch);
}
}
示例8: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException {
if (inputTuple != null && inputTuple.size() > 0) {
final DoublesUnion union = unionBuilder_.build();
final DataBag outerBag = (DataBag) inputTuple.get(0);
for (final Tuple dataTuple: outerBag) {
final Object f0 = dataTuple.get(0);
if (f0 == null) { continue; }
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag) f0; //inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) { continue; }
// If field 0 of a dataTuple is again a Bag all tuples of this inner bag
// will be passed into the union.
// It is due to system bagged outputs from multiple mapper Initial functions.
// The Intermediate stage was bypassed.
updateUnion(innerBag, union);
} else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
// If field 0 of a dataTuple is a DataByteArray we assume it is a sketch from a prior call
// It is due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.update(Memory.wrap(dba.get()));
} else {
throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
+ f0.getClass().getName());
}
}
final DoublesSketch resultSketch = union.getResultAndReset();
if (resultSketch != null) {
return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(true)));
}
}
// return empty sketch
return tupleFactory_.newTuple(new DataByteArray(unionBuilder_.build().getResult().toByteArray(true)));
}
示例9: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
if (inputTuple != null && inputTuple.size() > 0) {
final DoublesUnion union = unionBuilder_.build();
final DataBag outerBag = (DataBag) inputTuple.get(0);
for (final Tuple dataTuple: outerBag) {
final Object f0 = dataTuple.get(0);
if (f0 == null) { continue; }
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) { continue; }
// If field 0 of a dataTuple is a Bag all innerTuples of this inner bag
// will be passed into the union.
// It is due to system bagged outputs from multiple mapper Initial functions.
// The Intermediate stage was bypassed.
for (final Tuple innerTuple: innerBag) {
union.update((Double) innerTuple.get(0));
}
} else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
// If field 0 of a dataTuple is a DataByteArray we assume it is a sketch
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.update(Memory.wrap(dba.get()));
} else {
throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
+ f0.getClass().getName());
}
}
final DoublesSketch resultSketch = union.getResultAndReset();
if (resultSketch != null) {
return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(true)));
}
}
// return empty sketch
return tupleFactory_.newTuple(new DataByteArray(unionBuilder_.build().getResult().toByteArray(true)));
}
示例10: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException {
if (inputTuple != null && inputTuple.size() > 0) {
final ItemsUnion<T> union = k_ > 0
? ItemsUnion.getInstance(k_, comparator_)
: ItemsUnion.getInstance(comparator_);
final DataBag outerBag = (DataBag) inputTuple.get(0);
for (final Tuple dataTuple: outerBag) {
final Object f0 = dataTuple.get(0);
if (f0 == null) { continue; }
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag) f0; //inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) { continue; }
// If field 0 of a dataTuple is again a Bag all tuples of this inner bag
// will be passed into the union.
// It is due to system bagged outputs from multiple mapper Initial functions.
// The Intermediate stage was bypassed.
updateUnion(innerBag, union, comparator_, serDe_);
} else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
// If field 0 of a dataTuple is a DataByteArray we assume it is a sketch from a prior call
// It is due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.update(ItemsSketch.getInstance(Memory.wrap(dba.get()), comparator_, serDe_));
} else {
throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
+ f0.getClass().getName());
}
}
final ItemsSketch<T> resultSketch = union.getResultAndReset();
if (resultSketch != null) {
return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(serDe_)));
}
}
// return empty sketch
final ItemsSketch<T> sketch = k_ > 0
? ItemsSketch.getInstance(k_, comparator_)
: ItemsSketch.getInstance(comparator_);
return tupleFactory_.newTuple(new DataByteArray(sketch.toByteArray(serDe_)));
}
示例11: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
return null;
}
final DataBag samples = (DataBag) inputTuple.get(0);
// if entire input data fits in reservoir, shortcut result
if (samples.size() <= targetK_) {
return createResultTuple(samples.size(), targetK_, samples);
}
return super.exec(inputTuple);
}
示例12: dataBagToArrayList
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
static ArrayList<Tuple> dataBagToArrayList(final DataBag bag) {
final int arrayLength = (int) bag.size();
final ArrayList<Tuple> output = new ArrayList<>(arrayLength);
for (Tuple t : bag) {
output.add(t);
}
return output;
}
示例13: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
DataBag namedEntities = bagFactory.newDefaultBag();
Tuple t1 = (Tuple) input.get(0);
DataBag tokensBag = (DataBag) t1.get(0);
DataBag neBag = (DataBag) t1.get(1);
String[] tokens = new String[(int) tokensBag.size()];
String[] ne = new String[(int) neBag.size()];
int ti = 0;
for (Iterator iterator = tokensBag.iterator(); iterator.hasNext();) {
Tuple token = (Tuple) iterator.next();
tokens[ti] = (String) token.get(0);
ti++;
}
int ni = 0;
for (Iterator iterator = neBag.iterator(); iterator.hasNext();) {
Tuple neTag = (Tuple) iterator.next();
ne[ni] = (String) neTag.get(0);
ni++;
}
ArrayList<String> nelist = new ArrayList<String>();
String composedToken = "";
for (int i = 0; i < ne.length; i++) {
if (ne[i].equals("I-ORG") || ne[i].equals("ORG")) {
composedToken += tokens[i] + " ";
}
if (ne[i].equals("O") || i == ne.length - 1) {
if (composedToken != "") {
nelist.add(composedToken.trim());
}
composedToken = "";
}
}
for (String tok : nelist) {
namedEntities.add(tupleFactory.newTuple(tok));
}
return namedEntities;
}
示例14: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
if (inputTuple != null && inputTuple.size() > 0) {
final ItemsUnion<T> union = k_ > 0
? ItemsUnion.getInstance(k_, comparator_)
: ItemsUnion.getInstance(comparator_);
final DataBag outerBag = (DataBag) inputTuple.get(0);
for (final Tuple dataTuple: outerBag) {
final Object f0 = dataTuple.get(0);
if (f0 == null) { continue; }
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) { continue; }
// If field 0 of a dataTuple is a Bag all innerTuples of this inner bag
// will be passed into the union.
// It is due to system bagged outputs from multiple mapper Initial functions.
// The Intermediate stage was bypassed.
for (final Tuple innerTuple: innerBag) {
union.update(extractValue(innerTuple.get(0)));
}
} else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
// If field 0 of a dataTuple is a DataByteArray we assume it is a sketch
// due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
union.update(ItemsSketch.getInstance(Memory.wrap(dba.get()), comparator_, serDe_));
} else {
throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
+ f0.getClass().getName());
}
}
final ItemsSketch<T> resultSketch = union.getResultAndReset();
if (resultSketch != null) {
return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(serDe_)));
}
}
// return empty sketch
final ItemsSketch<T> sketch = k_ > 0
? ItemsSketch.getInstance(k_, comparator_)
: ItemsSketch.getInstance(comparator_);
return tupleFactory_.newTuple(new DataByteArray(sketch.toByteArray(serDe_)));
}
示例15: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override //IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
final com.yahoo.sketches.theta.Union union =
SetOperation.builder().setP(myP_).setSeed(mySeed_).setResizeFactor(RF)
.setNominalEntries(myNomEntries_).buildUnion();
final DataBag outerBag = extractBag(inputTuple); //InputTuple.bag0
if (outerBag == null) { //must have non-empty outer bag at field 0.
return myEmptyCompactOrderedSketchTuple_;
}
//Bag is not empty.
for (Tuple dataTuple : outerBag) {
final Object f0 = extractFieldAtIndex(dataTuple, 0); //inputTuple.bag0.dataTupleN.f0
//must have non-null field zero
if (f0 == null) {
continue; //go to next dataTuple if there is one. If none, exception is thrown.
}
//f0 is not null
if (f0 instanceof DataBag) {
final DataBag innerBag = (DataBag)f0; //inputTuple.bag0.dataTupleN.f0:bag
if (innerBag.size() == 0) {
continue; //go to next dataTuple if there is one. If none, exception is thrown.
}
//If field 0 of a dataTuple is again a Bag all tuples of this inner bag
// will be passed into the union.
//It is due to system bagged outputs from multiple mapper Initial functions.
//The Intermediate stage was bypassed.
updateUnion(innerBag, union); //process all tuples of innerBag
}
else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
//If field 0 of a dataTuple is a DataByteArray we assume it is a sketch from a prior call
//It is due to system bagged outputs from multiple mapper Intermediate functions.
// Each dataTuple.DBA:sketch will merged into the union.
final DataByteArray dba = (DataByteArray) f0;
final Memory srcMem = Memory.wrap(dba.get());
union.update(srcMem);
}
else { // we should never get here.
throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
+ f0.getClass().getName());
}
}
final CompactSketch compactSketch = union.getResult(true, null);
return compactOrderedSketchToTuple(compactSketch);
}