本文整理汇总了Java中org.apache.pig.data.DataBag.iterator方法的典型用法代码示例。如果您正苦于以下问题:Java DataBag.iterator方法的具体用法?Java DataBag.iterator怎么用?Java DataBag.iterator使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.pig.data.DataBag
的用法示例。
在下文中一共展示了DataBag.iterator方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: binData
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
protected static Tuple binData(DataBag values, int nbins, int width, double min) throws ExecException {
try {
System.err.println(values + " " + nbins + " " + width + " " + min);
long[] bins = new long[nbins];
Iterator<Tuple> it = values.iterator();
double val = 0;
int bin = 0;
while (it.hasNext()) {
Tuple next = it.next();
val = ((Number) next.get(0)).doubleValue() - min; // Shift by the min
bin = (int) Math.floor(val / width);
bins[bin]++;
}
Tuple tuple = tupleFactory.newTuple(bins.length);
for (int i = 0; i < bins.length; i++) {
tuple.set(i, bins[i]);
}
return tuple;
} catch (Exception e) {
log.warn("Exception while processing bins={}, width={}, values={}", new Object[] { nbins, width, values });
throw new ExecException(e.getCause());
}
}
示例2: approximateCompareBags
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
private Boolean approximateCompareBags(DataBag inputBag1, DataBag inputBag2) throws ExecException {
// Hardcode Acceptable Error
double errorLimit = 0.0000001;
Iterator<Tuple> iter1 = inputBag1.iterator();
Iterator<Tuple> iter2 = inputBag2.iterator();
while (iter1.hasNext()) {
Tuple tuple1 = iter1.next();
Tuple tuple2 = iter2.next();
// Check error
if (Math.abs((Double) tuple1.get(0) - (Double) tuple2.get(0)) > errorLimit) return false;
// TODO: Add unit test for differenced case
//if (Math.abs((Double) tuple1.get(1) - (Double) tuple2.get(1)) > errorLimit) return false;
if (Math.abs((Double) tuple1.get(2) - (Double) tuple2.get(2)) > errorLimit) return false;
if (Math.abs((Double) tuple1.get(3) - (Double) tuple2.get(3)) > errorLimit) return false;
if (Math.abs((Double) tuple1.get(4) - (Double) tuple2.get(4)) > errorLimit) return false;
}
return true;
}
示例3: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple tuple) throws IOException {
if (tuple == null || tuple.size() == 0) {
return null;
}
List<Tuple> tuples = new ArrayList<Tuple>();
for (int i = 0; i < tuple.size(); i++) {
if (tuple.get(i) != null) {
DataBag db = (DataBag) tuple.get(i);
Iterator<Tuple> it = db.iterator();
while (it.hasNext()) {
Tuple next = it.next();
if (!tuples.contains(next)) {
tuples.add(next);
}
}
}
}
if (tuples.isEmpty()) {
return null;
}
BagFactory bagFactory = BagFactory.getInstance();
return bagFactory.newDefaultBag(Lists.newArrayList(tuples));
}
示例4: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
/**
* Deduplicates tuples by grouping them by identifier stored in tuple[0] and picking the one with highest
* confidence level stored in tuple[confidenceLevelPosition].
*
* @param tuple {@link DataBag} holding group of tuples to be deduplicated
*/
@Override
public DataBag exec(Tuple tuple) throws IOException {
if (tuple == null || tuple.size() == 0) {
return null;
}
DataBag inputTuples = (DataBag) tuple.get(0);
if (inputTuples != null && inputTuples.size() > 1) {
// deduplicating only if more than one element
Map<String, Tuple> deduplicatedTuplesMap = new TreeMap<String, Tuple>();
Iterator<Tuple> inputTuplesIterator = inputTuples.iterator();
while (inputTuplesIterator.hasNext()) {
Tuple currentTuple = inputTuplesIterator.next();
updateStoredTupleWhenConfidenceLevelHigher(currentTuple, deduplicatedTuplesMap);
}
return BagFactory.getInstance().newDefaultBag(new ArrayList<Tuple>(deduplicatedTuplesMap.values()));
} else {
return inputTuples;
}
}
示例5: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple tuple) throws IOException {
if (tuple == null || tuple.size() != 2) {
return null;
}
DataBag dbMain = (DataBag) tuple.get(0);
DataBag dbSub = (DataBag) tuple.get(1);
if (dbMain == null || dbSub == null) {
return dbMain;
}
List<Tuple> tuples = new ArrayList<Tuple>();
Iterator<Tuple> itMain = dbMain.iterator();
while (itMain.hasNext()) {
tuples.add(itMain.next());
}
Iterator<Tuple> itSub = dbSub.iterator();
while (itSub.hasNext()) {
tuples.remove(itSub.next());
}
if (tuples.isEmpty()) {
return null;
}
BagFactory bagFactory = BagFactory.getInstance();
return bagFactory.newDefaultBag(tuples);
}
示例6: testExec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Test
public void testExec() throws Exception {
// given
String tuple1Name = "tuple1";
String tuple2Name = "tuple2";
TupleFactory tupleFactory = TupleFactory.getInstance();
BagFactory bagFactory = BagFactory.getInstance();
DataBag sourceBag = bagFactory.newDefaultBag(Lists.newArrayList(
tupleFactory.newTuple(tuple1Name),
tupleFactory.newTuple(tuple2Name)));
// execute
DataBag resultBag = udf.exec(tupleFactory.newTuple(sourceBag));
// assert
assertNotNull(resultBag);
Iterator<Tuple> tupleIt = resultBag.iterator();
assertTrue(tupleIt.hasNext());
Tuple firstTuple = tupleIt.next();
assertEquals(0, firstTuple.get(0));
assertEquals(tuple1Name, firstTuple.get(1));
assertTrue(tupleIt.hasNext());
Tuple secondTuple = tupleIt.next();
assertEquals(1, secondTuple.get(0));
assertEquals(tuple2Name, secondTuple.get(1));
assertFalse(tupleIt.hasNext());
}
示例7: exact
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Test
public void exact() throws Exception {
EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
ItemsSketch<String> sketch = new ItemsSketch<String>(8);
sketch.update("a");
sketch.update("a");
sketch.update("b");
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
DataBag bag = func.exec(inputTuple);
Assert.assertNotNull(bag);
Assert.assertEquals(bag.size(), 2);
Iterator<Tuple> it = bag.iterator();
Tuple tuple1 = it.next();
Assert.assertEquals(tuple1.size(), 4);
Assert.assertEquals((String)tuple1.get(0), "a");
Assert.assertEquals((long)tuple1.get(1), 2L);
Assert.assertEquals((long)tuple1.get(2), 2L);
Assert.assertEquals((long)tuple1.get(3), 2L);
Tuple tuple2 = it.next();
Assert.assertEquals(tuple2.size(), 4);
Assert.assertEquals((String)tuple2.get(0), "b");
Assert.assertEquals((long)tuple2.get(1), 1L);
Assert.assertEquals((long)tuple2.get(2), 1L);
Assert.assertEquals((long)tuple2.get(3), 1L);
}
示例8: exec
import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
DataBag namedEntities = bagFactory.newDefaultBag();
Tuple t1 = (Tuple) input.get(0);
DataBag tokensBag = (DataBag) t1.get(0);
DataBag neBag = (DataBag) t1.get(1);
String[] tokens = new String[(int) tokensBag.size()];
String[] ne = new String[(int) neBag.size()];
int ti = 0;
for (Iterator iterator = tokensBag.iterator(); iterator.hasNext();) {
Tuple token = (Tuple) iterator.next();
tokens[ti] = (String) token.get(0);
ti++;
}
int ni = 0;
for (Iterator iterator = neBag.iterator(); iterator.hasNext();) {
Tuple neTag = (Tuple) iterator.next();
ne[ni] = (String) neTag.get(0);
ni++;
}
ArrayList<String> nelist = new ArrayList<String>();
String composedToken = "";
for (int i = 0; i < ne.length; i++) {
if (ne[i].equals("I-ORG") || ne[i].equals("ORG")) {
composedToken += tokens[i] + " ";
}
if (ne[i].equals("O") || i == ne.length - 1) {
if (composedToken != "") {
nelist.add(composedToken.trim());
}
composedToken = "";
}
}
for (String tok : nelist) {
namedEntities.add(tupleFactory.newTuple(tok));
}
return namedEntities;
}