当前位置: 首页>>代码示例>>Java>>正文


Java DataBag.iterator方法代码示例

本文整理汇总了Java中org.apache.pig.data.DataBag.iterator方法的典型用法代码示例。如果您正苦于以下问题:Java DataBag.iterator方法的具体用法?Java DataBag.iterator怎么用?Java DataBag.iterator使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.pig.data.DataBag的用法示例。


在下文中一共展示了DataBag.iterator方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: binData

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
protected static Tuple binData(DataBag values, int nbins, int width, double min) throws ExecException {
  try {
    System.err.println(values + " " + nbins + " " + width + " " + min);
    long[] bins = new long[nbins];
    Iterator<Tuple> it = values.iterator();
    double val = 0;
    int bin = 0;
    while (it.hasNext()) {
      Tuple next = it.next();
      val = ((Number) next.get(0)).doubleValue() - min; // Shift by the min
      bin = (int) Math.floor(val / width);
      bins[bin]++;
    }
    Tuple tuple = tupleFactory.newTuple(bins.length);
    for (int i = 0; i < bins.length; i++) {
      tuple.set(i, bins[i]);
    }
    return tuple;
  } catch (Exception e) {
    log.warn("Exception while processing bins={}, width={}, values={}", new Object[] { nbins, width, values });
    throw new ExecException(e.getCause());
  }
}
 
开发者ID:lucidworks,项目名称:pig-solr,代码行数:24,代码来源:Histogram.java

示例2: approximateCompareBags

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
private Boolean approximateCompareBags(DataBag inputBag1, DataBag inputBag2) throws ExecException {
  	
  	// Hardcode Acceptable Error
  	double errorLimit = 0.0000001;

Iterator<Tuple> iter1 = inputBag1.iterator();
Iterator<Tuple> iter2 = inputBag2.iterator();
while (iter1.hasNext()) {
	Tuple tuple1 = iter1.next();
	Tuple tuple2 = iter2.next();
	
	// Check error
	if (Math.abs((Double) tuple1.get(0) - (Double) tuple2.get(0)) > errorLimit) return false;
	// TODO: Add unit test for differenced case
	//if (Math.abs((Double) tuple1.get(1) - (Double) tuple2.get(1)) > errorLimit) return false;
	if (Math.abs((Double) tuple1.get(2) - (Double) tuple2.get(2)) > errorLimit) return false;
	if (Math.abs((Double) tuple1.get(3) - (Double) tuple2.get(3)) > errorLimit) return false;
	if (Math.abs((Double) tuple1.get(4) - (Double) tuple2.get(4)) > errorLimit) return false;

}
  			
  	return true;
  }
 
开发者ID:Netflix,项目名称:Surus,代码行数:24,代码来源:RAD_Test.java

示例3: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple tuple) throws IOException {
    if (tuple == null || tuple.size() == 0) {
        return null;
    }
    List<Tuple> tuples = new ArrayList<Tuple>();
    for (int i = 0; i < tuple.size(); i++) {
        if (tuple.get(i) != null) {
            DataBag db = (DataBag) tuple.get(i);
            Iterator<Tuple> it = db.iterator();
            while (it.hasNext()) {
                Tuple next = it.next();
                if (!tuples.contains(next)) {
                    tuples.add(next);
                }
            }
        }
    }
    if (tuples.isEmpty()) {
        return null;
    }

    BagFactory bagFactory = BagFactory.getInstance();
    return bagFactory.newDefaultBag(Lists.newArrayList(tuples));
}
 
开发者ID:openaire,项目名称:iis,代码行数:26,代码来源:StringBagsMerger.java

示例4: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
/**
 * Deduplicates tuples by grouping them by identifier stored in tuple[0] and picking the one with highest 
 * confidence level stored in tuple[confidenceLevelPosition].
 * 
 * @param tuple {@link DataBag} holding group of tuples to be deduplicated
 */
@Override
public DataBag exec(Tuple tuple) throws IOException {
    if (tuple == null || tuple.size() == 0) {
        return null;
    }
    DataBag inputTuples = (DataBag) tuple.get(0);
    if (inputTuples != null && inputTuples.size() > 1) {
        // deduplicating only if more than one element
        Map<String, Tuple> deduplicatedTuplesMap = new TreeMap<String, Tuple>();
        Iterator<Tuple> inputTuplesIterator = inputTuples.iterator();
        while (inputTuplesIterator.hasNext()) {
            Tuple currentTuple = inputTuplesIterator.next();
            updateStoredTupleWhenConfidenceLevelHigher(currentTuple, deduplicatedTuplesMap);
        }
        return BagFactory.getInstance().newDefaultBag(new ArrayList<Tuple>(deduplicatedTuplesMap.values()));
    } else {
        return inputTuples;
    }
}
 
开发者ID:openaire,项目名称:iis,代码行数:26,代码来源:IdConfidenceTupleDeduplicator.java

示例5: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple tuple) throws IOException {
    if (tuple == null || tuple.size() != 2) {
        return null;
    }
    DataBag dbMain = (DataBag) tuple.get(0);
    DataBag dbSub = (DataBag) tuple.get(1);
    if (dbMain == null || dbSub == null) {
        return dbMain;
    }
    
    List<Tuple> tuples = new ArrayList<Tuple>();
    Iterator<Tuple> itMain = dbMain.iterator();
    while (itMain.hasNext()) {
        tuples.add(itMain.next());
    }
    Iterator<Tuple> itSub = dbSub.iterator();
    while (itSub.hasNext()) {
        tuples.remove(itSub.next());
    }
    if (tuples.isEmpty()) {
        return null;
    }

    BagFactory bagFactory = BagFactory.getInstance();
    return bagFactory.newDefaultBag(tuples);
}
 
开发者ID:openaire,项目名称:iis,代码行数:28,代码来源:StringBagsDifference.java

示例6: testExec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Test
public void testExec() throws Exception {
    // given
    String tuple1Name = "tuple1";
    String tuple2Name = "tuple2";
    TupleFactory tupleFactory = TupleFactory.getInstance();
    BagFactory bagFactory = BagFactory.getInstance();
    DataBag sourceBag = bagFactory.newDefaultBag(Lists.newArrayList(
            tupleFactory.newTuple(tuple1Name), 
            tupleFactory.newTuple(tuple2Name)));
    
    // execute
    DataBag resultBag = udf.exec(tupleFactory.newTuple(sourceBag));
    
    // assert
    assertNotNull(resultBag);
    Iterator<Tuple> tupleIt = resultBag.iterator();
    
    assertTrue(tupleIt.hasNext());
    Tuple firstTuple = tupleIt.next();
    assertEquals(0, firstTuple.get(0));
    assertEquals(tuple1Name, firstTuple.get(1));
    
    assertTrue(tupleIt.hasNext());
    Tuple secondTuple = tupleIt.next();
    assertEquals(1, secondTuple.get(0));
    assertEquals(tuple2Name, secondTuple.get(1));
    
    assertFalse(tupleIt.hasNext());
}
 
开发者ID:openaire,项目名称:iis,代码行数:31,代码来源:StringListToListWithIndexesTest.java

示例7: exact

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Test
public void exact() throws Exception {
  EvalFunc<DataBag> func = new FrequentStringsSketchToEstimates();
  ItemsSketch<String> sketch = new ItemsSketch<String>(8);
  sketch.update("a");
  sketch.update("a");
  sketch.update("b");
  Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.toByteArray(new ArrayOfStringsSerDe())));
  DataBag bag = func.exec(inputTuple);
  Assert.assertNotNull(bag);
  Assert.assertEquals(bag.size(), 2);

  Iterator<Tuple> it = bag.iterator();
  Tuple tuple1 = it.next();
  Assert.assertEquals(tuple1.size(), 4);
  Assert.assertEquals((String)tuple1.get(0), "a");
  Assert.assertEquals((long)tuple1.get(1), 2L);
  Assert.assertEquals((long)tuple1.get(2), 2L);
  Assert.assertEquals((long)tuple1.get(3), 2L);

  Tuple tuple2 = it.next();
  Assert.assertEquals(tuple2.size(), 4);
  Assert.assertEquals((String)tuple2.get(0), "b");
  Assert.assertEquals((long)tuple2.get(1), 1L);
  Assert.assertEquals((long)tuple2.get(2), 1L);
  Assert.assertEquals((long)tuple2.get(3), 1L);
}
 
开发者ID:DataSketches,项目名称:sketches-pig,代码行数:28,代码来源:FrequentStringsSketchToEstimatesTest.java

示例8: exec

import org.apache.pig.data.DataBag; //导入方法依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
	if (input == null || input.size() == 0)
		return null;

	DataBag namedEntities = bagFactory.newDefaultBag();

	Tuple t1 = (Tuple) input.get(0);
	DataBag tokensBag = (DataBag) t1.get(0);
	DataBag neBag = (DataBag) t1.get(1);

	String[] tokens = new String[(int) tokensBag.size()];
	String[] ne = new String[(int) neBag.size()];

	int ti = 0;
	for (Iterator iterator = tokensBag.iterator(); iterator.hasNext();) {
		Tuple token = (Tuple) iterator.next();
		tokens[ti] = (String) token.get(0);
		ti++;
	}

	int ni = 0;
	for (Iterator iterator = neBag.iterator(); iterator.hasNext();) {
		Tuple neTag = (Tuple) iterator.next();
		ne[ni] = (String) neTag.get(0);
		ni++;
	}

	ArrayList<String> nelist = new ArrayList<String>();
	String composedToken = "";

	for (int i = 0; i < ne.length; i++) {

		if (ne[i].equals("I-ORG") || ne[i].equals("ORG")) {
			composedToken += tokens[i] + " ";
		}

		if (ne[i].equals("O") || i == ne.length - 1) {
			if (composedToken != "") {
				nelist.add(composedToken.trim());
			}

			composedToken = "";
		}

	}

	for (String tok : nelist) {
		namedEntities.add(tupleFactory.newTuple(tok));
	}

	return namedEntities;

}
 
开发者ID:news-sentiment,项目名称:news-sentiment-pig,代码行数:55,代码来源:NamedEntity.java


注:本文中的org.apache.pig.data.DataBag.iterator方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。