当前位置: 首页>>代码示例>>Java>>正文


Java DataBag.size方法代码示例

本文整理汇总了Java中org.apache.pig.data.DataBag.size方法的典型用法代码示例。如果您正苦于以下问题:Java DataBag.size方法的具体用法?Java DataBag.size怎么用?Java DataBag.size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.pig.data.DataBag的用法示例。


在下文中一共展示了DataBag.size方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
/**
 * Deduplicates tuples by grouping them by identifier stored in tuple[0] and picking the one with highest 
 * confidence level stored in tuple[confidenceLevelPosition].
 * 
 * @param tuple {@link DataBag} holding group of tuples to be deduplicated
 */
@Override
public DataBag exec(Tuple tuple) throws IOException {
    if (tuple == null || tuple.size() == 0) {
        return null;
    }
    DataBag inputTuples = (DataBag) tuple.get(0);
    if (inputTuples != null && inputTuples.size() > 1) {
        // deduplicating only if more than one element
        Map<String, Tuple> deduplicatedTuplesMap = new TreeMap<String, Tuple>();
        Iterator<Tuple> inputTuplesIterator = inputTuples.iterator();
        while (inputTuplesIterator.hasNext()) {
            Tuple currentTuple = inputTuplesIterator.next();
            updateStoredTupleWhenConfidenceLevelHigher(currentTuple, deduplicatedTuplesMap);
        }
        return BagFactory.getInstance().newDefaultBag(new ArrayList<Tuple>(deduplicatedTuplesMap.values()));
    } else {
        return inputTuples;
    }
}
 
开发者ID:openaire,项目名称:iis,代码行数:26,代码来源:IdConfidenceTupleDeduplicator.java

示例2: accumulate

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
  if (isFirstCall_) {
    // this is to see in the log which way was used by Pig
    Logger.getLogger(getClass()).info("accumulator is used");
    isFirstCall_ = false;
  }
  if ((inputTuple == null) || (inputTuple.size() != 1)) {
    return;
  }
  final DataBag bag = (DataBag) inputTuple.get(0);
  if (bag == null || bag.size() == 0) { return; }
  if (union_ == null) {
    union_ = new Union<S>(sketchSize_, summaryFactory_);
  }
  updateUnion(bag, union_);
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:18,代码来源:UnionSketch.java

示例3: accumulate

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
  if (isFirstCall_) {
    // this is to see in the log which way was used by Pig
    Logger.getLogger(getClass()).info("accumulator is used");
    isFirstCall_ = false;
  }
  if ((inputTuple == null) || (inputTuple.size() != 1)) {
    return;
  }
  final DataBag bag = (DataBag) inputTuple.get(0);
  if (bag == null || bag.size() == 0) { return; }
  if (accumUnion_ == null) {
    accumUnion_ = new ArrayOfDoublesSetOperationBuilder().setNominalEntries(sketchSize_)
        .setNumberOfValues(numValues_).buildUnion();
  }
  updateUnion(bag, accumUnion_);
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:19,代码来源:UnionArrayOfDoublesSketchBase.java

示例4: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple tuple) throws IOException {
    if (tuple == null || tuple.size() != 1) {
        return null;
    }
    DataBag db = (DataBag) tuple.get(0);
    if (db == null || db.size() == 0) {
        return null;
    }
    return db;
}
 
开发者ID:openaire,项目名称:iis,代码行数:12,代码来源:EmptyBagToNull.java

示例5: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataByteArray exec(final Tuple inputTuple) throws IOException {
  if (isFirstCall_) {
    Logger.getLogger(getClass()).info("Algebraic was used");
    isFirstCall_ = false;
  }
  if (inputTuple == null || inputTuple.size() == 0) {
    return getEmptySketch();
  }
  final Union union = new Union(lgK_);
  final DataBag outerBag = (DataBag) inputTuple.get(0);
  if (outerBag == null) {
    return getEmptySketch();
  }
  for (final Tuple dataTuple: outerBag) {
    final Object f0 = dataTuple.get(0); // inputTuple.bag0.dataTupleN.f0
    if (f0 == null) {
      continue;
    }
    if (f0 instanceof DataBag) {
      final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
      if (innerBag.size() == 0) { continue; }
      // If field 0 of a dataTuple is a Bag, all innerTuples of this inner bag
      // will be passed into the union.
      // It is due to system bagged outputs from multiple mapper Initial functions.
      // The Intermediate stage was bypassed.
      updateUnion(innerBag, union);
    } else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
      // If field 0 of a dataTuple is a DataByteArray, we assume it is a sketch
      // due to system bagged outputs from multiple mapper Intermediate functions.
      // Each dataTuple.DBA:sketch will merged into the union.
      final DataByteArray dba = (DataByteArray) f0;
      union.update(HllSketch.wrap(Memory.wrap(dba.get())));
    } else { // we should never get here
      throw new IllegalArgumentException("dataTuple.Field0 is not a DataBag or DataByteArray: "
          + f0.getClass().getName());
    }
  }
  return new DataByteArray(union.getResult(tgtHllType_).toCompactByteArray());
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:41,代码来源:AlgebraicFinal.java

示例6: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
  if (isFirstCall_) {
    Logger.getLogger(getClass()).info("Algebraic was used");
    isFirstCall_ = false;
  }
  if (inputTuple == null || inputTuple.size() == 0) {
    return getEmptySketchTuple();
  }
  final DataBag outerBag = (DataBag) inputTuple.get(0);
  if (outerBag == null) {
    return getEmptySketchTuple();
  }
  final Union union = new Union(lgK_);
  for (final Tuple dataTuple: outerBag) {
    final Object f0 = dataTuple.get(0); // inputTuple.bag0.dataTupleN.f0
    if (f0 == null) { continue; }
    if (f0 instanceof DataBag) {
      final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
      if (innerBag.size() == 0) { continue; }
      // If field 0 of a dataTuple is a Bag, all innerTuples of this inner bag
      // will be passed into the union.
      // It is due to system bagged outputs from multiple mapper Initial functions.
      // The Intermediate stage was bypassed.
      updateUnion(innerBag, union);
    } else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
      // If field 0 of a dataTuple is a DataByteArray, we assume it is a sketch
      // due to system bagged outputs from multiple mapper Intermediate functions.
      // Each dataTuple.DBA:sketch will merged into the union.
      final DataByteArray dba = (DataByteArray) f0;
      union.update(HllSketch.wrap(Memory.wrap(dba.get())));
    } else { // we should never get here
      throw new IllegalArgumentException("dataTuple.Field0 is not a DataBag or DataByteArray: "
          + f0.getClass().getName());
    }
  }
  return tupleFactory_.newTuple(new DataByteArray(union.getResult(tgtHllType_).toCompactByteArray()));
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:39,代码来源:AlgebraicIntermediate.java

示例7: accumulate

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
  if (isFirstCall_) {
    // this is to see in the log which way was used by Pig
    Logger.getLogger(getClass()).info("accumulator is used");
    isFirstCall_ = false;
  }
  if ((inputTuple == null) || (inputTuple.size() != 1)) {
    return;
  }
  final Object obj = inputTuple.get(0);
  if (!(obj instanceof DataBag)) {
    return;
  }
  final DataBag bag = (DataBag) inputTuple.get(0);
  if (bag.size() == 0) {
    return;
  }

  if (sketch_ == null) {
    sketch_ = new ItemsSketch<T>(sketchSize_);
  }
  for (final Tuple innerTuple: bag) {
    final int sz = innerTuple.size();
    if ((sz != 1) || (innerTuple.get(0) == null)) {
      continue;
    }
    final ItemsSketch<T> incomingSketch = Util.deserializeSketchFromTuple(innerTuple, serDe_);
    sketch_.merge(incomingSketch);
  }
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:32,代码来源:UnionFrequentItemsSketch.java

示例8: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException {
  if (inputTuple != null && inputTuple.size() > 0) {
    final DoublesUnion union = unionBuilder_.build();
    final DataBag outerBag = (DataBag) inputTuple.get(0);
    for (final Tuple dataTuple: outerBag) {
      final Object f0 = dataTuple.get(0);
      if (f0 == null) { continue; }
      if (f0 instanceof DataBag) {
        final DataBag innerBag = (DataBag) f0; //inputTuple.bag0.dataTupleN.f0:bag
        if (innerBag.size() == 0) { continue; }
        // If field 0 of a dataTuple is again a Bag all tuples of this inner bag
        // will be passed into the union.
        // It is due to system bagged outputs from multiple mapper Initial functions.
        // The Intermediate stage was bypassed.
        updateUnion(innerBag, union);
      } else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
        // If field 0 of a dataTuple is a DataByteArray we assume it is a sketch from a prior call
        // It is due to system bagged outputs from multiple mapper Intermediate functions.
        // Each dataTuple.DBA:sketch will merged into the union.
        final DataByteArray dba = (DataByteArray) f0;
        union.update(Memory.wrap(dba.get()));
      } else {
        throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
          + f0.getClass().getName());
      }
    }
    final DoublesSketch resultSketch = union.getResultAndReset();
    if (resultSketch != null) {
      return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(true)));
    }
  }
  // return empty sketch
  return tupleFactory_.newTuple(new DataByteArray(unionBuilder_.build().getResult().toByteArray(true)));
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:36,代码来源:UnionDoublesSketch.java

示例9: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
  if (inputTuple != null && inputTuple.size() > 0) {
    final DoublesUnion union = unionBuilder_.build();
    final DataBag outerBag = (DataBag) inputTuple.get(0);
    for (final Tuple dataTuple: outerBag) {
      final Object f0 = dataTuple.get(0);
      if (f0 == null) { continue; }
      if (f0 instanceof DataBag) {
        final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
        if (innerBag.size() == 0) { continue; }
        // If field 0 of a dataTuple is a Bag all innerTuples of this inner bag
        // will be passed into the union.
        // It is due to system bagged outputs from multiple mapper Initial functions.
        // The Intermediate stage was bypassed.
        for (final Tuple innerTuple: innerBag) {
          union.update((Double) innerTuple.get(0));
        }
      } else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
        // If field 0 of a dataTuple is a DataByteArray we assume it is a sketch
        // due to system bagged outputs from multiple mapper Intermediate functions.
        // Each dataTuple.DBA:sketch will merged into the union.
        final DataByteArray dba = (DataByteArray) f0;
        union.update(Memory.wrap(dba.get()));
      } else {
        throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
            + f0.getClass().getName());
      }
    }
    final DoublesSketch resultSketch = union.getResultAndReset();
    if (resultSketch != null) {
      return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(true)));
    }
  }
  // return empty sketch
  return tupleFactory_.newTuple(new DataByteArray(unionBuilder_.build().getResult().toByteArray(true)));
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:38,代码来源:DataToDoublesSketch.java

示例10: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException {
  if (inputTuple != null && inputTuple.size() > 0) {
    final ItemsUnion<T> union = k_ > 0
        ? ItemsUnion.getInstance(k_, comparator_)
        : ItemsUnion.getInstance(comparator_);
    final DataBag outerBag = (DataBag) inputTuple.get(0);
    for (final Tuple dataTuple: outerBag) {
      final Object f0 = dataTuple.get(0);
      if (f0 == null) { continue; }
      if (f0 instanceof DataBag) {
        final DataBag innerBag = (DataBag) f0; //inputTuple.bag0.dataTupleN.f0:bag
        if (innerBag.size() == 0) { continue; }
        // If field 0 of a dataTuple is again a Bag all tuples of this inner bag
        // will be passed into the union.
        // It is due to system bagged outputs from multiple mapper Initial functions.
        // The Intermediate stage was bypassed.
        updateUnion(innerBag, union, comparator_, serDe_);
      } else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
        // If field 0 of a dataTuple is a DataByteArray we assume it is a sketch from a prior call
        // It is due to system bagged outputs from multiple mapper Intermediate functions.
        // Each dataTuple.DBA:sketch will merged into the union.
        final DataByteArray dba = (DataByteArray) f0;
        union.update(ItemsSketch.getInstance(Memory.wrap(dba.get()), comparator_, serDe_));
      } else {
        throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
          + f0.getClass().getName());
      }
    }
    final ItemsSketch<T> resultSketch = union.getResultAndReset();
    if (resultSketch != null) {
      return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(serDe_)));
    }
  }
  // return empty sketch
  final ItemsSketch<T> sketch = k_ > 0
      ? ItemsSketch.getInstance(k_, comparator_)
      : ItemsSketch.getInstance(comparator_);
  return tupleFactory_.newTuple(new DataByteArray(sketch.toByteArray(serDe_)));
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:41,代码来源:UnionItemsSketch.java

示例11: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
  if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
    return null;
  }

  final DataBag samples = (DataBag) inputTuple.get(0);

  // if entire input data fits in reservoir, shortcut result
  if (samples.size() <= targetK_) {
    return createResultTuple(samples.size(), targetK_, samples);
  }
  return super.exec(inputTuple);
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:15,代码来源:ReservoirSampling.java

示例12: dataBagToArrayList

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
static ArrayList<Tuple> dataBagToArrayList(final DataBag bag) {
  final int arrayLength = (int) bag.size();
  final ArrayList<Tuple> output = new ArrayList<>(arrayLength);

  for (Tuple t : bag) {
    output.add(t);
  }

  return output;
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:11,代码来源:ReservoirSampling.java

示例13: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
	if (input == null || input.size() == 0)
		return null;

	DataBag namedEntities = bagFactory.newDefaultBag();

	Tuple t1 = (Tuple) input.get(0);
	DataBag tokensBag = (DataBag) t1.get(0);
	DataBag neBag = (DataBag) t1.get(1);

	String[] tokens = new String[(int) tokensBag.size()];
	String[] ne = new String[(int) neBag.size()];

	int ti = 0;
	for (Iterator iterator = tokensBag.iterator(); iterator.hasNext();) {
		Tuple token = (Tuple) iterator.next();
		tokens[ti] = (String) token.get(0);
		ti++;
	}

	int ni = 0;
	for (Iterator iterator = neBag.iterator(); iterator.hasNext();) {
		Tuple neTag = (Tuple) iterator.next();
		ne[ni] = (String) neTag.get(0);
		ni++;
	}

	ArrayList<String> nelist = new ArrayList<String>();
	String composedToken = "";

	for (int i = 0; i < ne.length; i++) {

		if (ne[i].equals("I-ORG") || ne[i].equals("ORG")) {
			composedToken += tokens[i] + " ";
		}

		if (ne[i].equals("O") || i == ne.length - 1) {
			if (composedToken != "") {
				nelist.add(composedToken.trim());
			}

			composedToken = "";
		}

	}

	for (String tok : nelist) {
		namedEntities.add(tupleFactory.newTuple(tok));
	}

	return namedEntities;

}
 
开发者ID:news-sentiment,项目名称:news-sentiment-pig,代码行数:55,代码来源:NamedEntity.java

示例14: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override // IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API
  if (inputTuple != null && inputTuple.size() > 0) {
    final ItemsUnion<T> union = k_ > 0
        ? ItemsUnion.getInstance(k_, comparator_)
        : ItemsUnion.getInstance(comparator_);
    final DataBag outerBag = (DataBag) inputTuple.get(0);
    for (final Tuple dataTuple: outerBag) {
      final Object f0 = dataTuple.get(0);
      if (f0 == null) { continue; }
      if (f0 instanceof DataBag) {
        final DataBag innerBag = (DataBag) f0; // inputTuple.bag0.dataTupleN.f0:bag
        if (innerBag.size() == 0) { continue; }
        // If field 0 of a dataTuple is a Bag all innerTuples of this inner bag
        // will be passed into the union.
        // It is due to system bagged outputs from multiple mapper Initial functions.
        // The Intermediate stage was bypassed.
        for (final Tuple innerTuple: innerBag) {
          union.update(extractValue(innerTuple.get(0)));
        }
      } else if (f0 instanceof DataByteArray) { // inputTuple.bag0.dataTupleN.f0:DBA
        // If field 0 of a dataTuple is a DataByteArray we assume it is a sketch
        // due to system bagged outputs from multiple mapper Intermediate functions.
        // Each dataTuple.DBA:sketch will merged into the union.
        final DataByteArray dba = (DataByteArray) f0;
        union.update(ItemsSketch.getInstance(Memory.wrap(dba.get()), comparator_, serDe_));
      } else {
        throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
            + f0.getClass().getName());
      }
    }
    final ItemsSketch<T> resultSketch = union.getResultAndReset();
    if (resultSketch != null) {
      return tupleFactory_.newTuple(new DataByteArray(resultSketch.toByteArray(serDe_)));
    }
  }
  // return empty sketch
  final ItemsSketch<T> sketch = k_ > 0
      ? ItemsSketch.getInstance(k_, comparator_)
      : ItemsSketch.getInstance(comparator_);
  return tupleFactory_.newTuple(new DataByteArray(sketch.toByteArray(serDe_)));
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:43,代码来源:DataToItemsSketch.java

示例15: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override //IntermediateFinal exec
public Tuple exec(final Tuple inputTuple) throws IOException { //throws is in API

  final com.yahoo.sketches.theta.Union union =
      SetOperation.builder().setP(myP_).setSeed(mySeed_).setResizeFactor(RF)
              .setNominalEntries(myNomEntries_).buildUnion();
  final DataBag outerBag = extractBag(inputTuple); //InputTuple.bag0
  if (outerBag == null) {  //must have non-empty outer bag at field 0.
    return myEmptyCompactOrderedSketchTuple_;
  }
  //Bag is not empty.

  for (Tuple dataTuple : outerBag) {
    final Object f0 = extractFieldAtIndex(dataTuple, 0); //inputTuple.bag0.dataTupleN.f0
    //must have non-null field zero
    if (f0 == null) {
      continue; //go to next dataTuple if there is one.  If none, exception is thrown.
    }
    //f0 is not null
    if (f0 instanceof DataBag) {
      final DataBag innerBag = (DataBag)f0; //inputTuple.bag0.dataTupleN.f0:bag
      if (innerBag.size() == 0) {
        continue; //go to next dataTuple if there is one.  If none, exception is thrown.
      }
      //If field 0 of a dataTuple is again a Bag all tuples of this inner bag
      // will be passed into the union.
      //It is due to system bagged outputs from multiple mapper Initial functions.
      //The Intermediate stage was bypassed.
      updateUnion(innerBag, union); //process all tuples of innerBag

    }
    else if (f0 instanceof DataByteArray) { //inputTuple.bag0.dataTupleN.f0:DBA
      //If field 0 of a dataTuple is a DataByteArray we assume it is a sketch from a prior call
      //It is due to system bagged outputs from multiple mapper Intermediate functions.
      // Each dataTuple.DBA:sketch will merged into the union.
      final DataByteArray dba = (DataByteArray) f0;
      final Memory srcMem = Memory.wrap(dba.get());
      union.update(srcMem);

    }
    else { // we should never get here.
      throw new IllegalArgumentException("dataTuple.Field0: Is not a DataByteArray: "
          + f0.getClass().getName());
    }
  }

  final CompactSketch compactSketch = union.getResult(true, null);
  return compactOrderedSketchToTuple(compactSketch);
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:50,代码来源:Union.java


注:本文中的org.apache.pig.data.DataBag.size方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。