本文整理汇总了Java中org.apache.pig.data.DataBag类的典型用法代码示例。如果您正苦于以下问题:Java DataBag类的具体用法?Java DataBag怎么用?Java DataBag使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DataBag类属于org.apache.pig.data包,在下文中一共展示了DataBag类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testRejectionOfNonSimpleType
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@SuppressWarnings("unchecked") //still triggers unchecked warning
@Test(expectedExceptions = IllegalArgumentException.class)
public void testRejectionOfNonSimpleType() throws IOException {
TupleFactory mTupleFactory = TupleFactory.getInstance();
BagFactory bagFactory = BagFactory.getInstance();
Tuple outerTuple = mTupleFactory.newTuple(1);
DataBag outerBag = bagFactory.newDefaultBag();
Tuple innerTuple = mTupleFactory.newTuple(1);
DataBag innerBag = bagFactory.newDefaultBag();
innerTuple.set(0, innerBag);
outerBag.add(innerTuple);
outerTuple.set(0, outerBag);
String[] ctorArgs = { "128" };
EvalFunc<Tuple> dataUdf =
(EvalFunc<Tuple>) PigContext.instantiateFuncFromSpec(new FuncSpec(udfName, ctorArgs));
dataUdf.exec(outerTuple);
}
示例2: exec
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
String newsBody = (String) input.get(0);
String[] sentences = sentenceTagger.splitSentences(newsBody);
DataBag bag = bagFactory.newDefaultBag();
for (String sentence : sentences) {
bag.add(tupleFactory.newTuple(sentence));
}
return bag;
}
示例3: algebraicInitialOneParam
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void algebraicInitialOneParam() throws Exception {
EvalFunc<Tuple> func = new DataToDoubleSummarySketch.Initial(null);
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
bag.add(PigUtil.objectsToTuple(null, null));
bag.add(PigUtil.objectsToTuple(null, null));
bag.add(PigUtil.objectsToTuple(null, null));
inputTuple.set(0, bag);
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
DataBag resultBag = (DataBag) resultTuple.get(0);
Assert.assertEquals(resultBag.size(), 3);
}
示例4: checkExactTopExec
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void checkExactTopExec() throws IOException {
EvalFunc<Tuple> interFunc = new Intersect();
EvalFunc<Double> estFunc = new Estimate();
//create inputTuple and a bag, add bag to inputTuple
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag);
//create 4 overlapping sketches of 64 in a bag
for (int i = 0; i < 4; i++ ) {
Tuple dataTuple = TupleFactory.getInstance().newTuple(1);
dataTuple.set(0, createDbaFromQssRange(256, i*64, 256));
bag.add(dataTuple);
}
Tuple resultTuple = interFunc.exec(inputTuple);
assertNotNull(resultTuple);
assertEquals(resultTuple.size(), 1);
Double est = estFunc.exec(resultTuple);
assertEquals(est, 64.0, 0.0);
}
示例5: checkAlgFinalInnerNotDBA
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkAlgFinalInnerNotDBA() throws IOException {
EvalFunc<Tuple> interFuncFinal = new Union.IntermediateFinal("256");
EvalFunc<Double> estFunc = new Estimate();
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
Tuple resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag); //inputTuple.bag0:null
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
bag.add(innerTuple);
innerTuple.set(0, new Double(1.0)); //not a DBA
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
}
示例6: checkAlgFinalInnerBagEmpty
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void checkAlgFinalInnerBagEmpty() throws IOException {
EvalFunc<Tuple> interFuncFinal = new Union.IntermediateFinal("256");
EvalFunc<Double> estFunc = new Estimate();
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
Tuple resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag); //inputTuple.bag0:null
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
bag.add(innerTuple);
DataBag bag2 = BagFactory.getInstance().newDefaultBag();
innerTuple.set(0, bag2);
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
}
示例7: testUniformDistribution
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void testUniformDistribution() throws IOException {
Random rnd = getRandomInstance();
for (int i = 0; i < 100; i++) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (int j = 0; j < 100; j++) {
bag.add(TupleFactory.getInstance().newTuple(new Integer(rnd.nextInt(100))));
}
Tuple result = Histogram.exec(bag, 0, 99, 1);
//System.err.println(result);
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0), 100L);
result = Histogram.exec(bag, 0, 99, 2);
//System.err.println(result);
Assert.assertEquals(result.size(), 2);
result = Histogram.exec(bag, 0, 99, 10);
//System.err.println(result);
Assert.assertEquals(result.size(), 10);
}
}
示例8: testNormalDistribution
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void testNormalDistribution() throws IOException {
Random rnd = getRandomInstance();
for (int i = 0; i < 1000; i++) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (int j = 0; j < 100; j++) {
int x = Math.max(Math.min((int) (50 + rnd.nextGaussian() * 50), 100), 0);
bag.add(TupleFactory.getInstance().newTuple(new Integer(x)));
}
Tuple result = Histogram.exec(bag, 0, 100, 1);
//System.err.println(result);
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0), 100L);
result = Histogram.exec(bag, 0, 100, 2);
//System.err.println(result);
Assert.assertEquals(result.size(), 2);
result = Histogram.exec(bag, 0, 100, 10);
//System.err.println(result);
Assert.assertEquals(result.size(), 10);
}
}
示例9: algebraicIntermediateFromInitial
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void algebraicIntermediateFromInitial() throws Exception {
@SuppressWarnings("unchecked")
EvalFunc<Tuple> func =
(EvalFunc<Tuple>) Class.forName(new UnionSketch().getIntermed())
.getConstructor(String.class, String.class).newInstance("10", "HLL_6");
HllSketch inputSketch = new HllSketch(12);
inputSketch.update(1);
inputSketch.update(2);
inputSketch.update(3);
DataBag outerBag = bagFactory.newDefaultBag();
DataBag innerBag = bagFactory.newDefaultBag();
innerBag.add(tupleFactory.newTuple(new DataByteArray(inputSketch.toCompactByteArray())));
outerBag.add(tupleFactory.newTuple(innerBag));
Tuple result = func.exec(tupleFactory.newTuple(outerBag));
HllSketch sketch = DataToSketchTest.getSketch((DataByteArray) result.get(0));
Assert.assertFalse(sketch.isEmpty());
Assert.assertEquals(sketch.getEstimate(), 3.0, 0.01);
Assert.assertEquals(sketch.getLgConfigK(), 10);
Assert.assertEquals(sketch.getTgtHllType(), TgtHllType.HLL_6);
}
示例10: testSimple
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void testSimple() throws IOException {
Extract extractor = new Extract();
TupleFactory maker = TupleFactory.getInstance();
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
int[] values = new int[] { 6, 22 };
String[] strings = new String[] { "term1", "term2" };
Tuple[] tuples = new Tuple[2];
for (int i = 0; i < tuples.length; i++) {
tuples[i] = maker.newTuple(Arrays.asList(strings[i], values[i]));
bag.add(tuples[i]);
}
Tuple test1 = maker.newTuple(Arrays.asList(bag, 0));
Tuple result1 = extractor.exec(test1);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result1.get(i), strings[i]);
}
Tuple test2 = maker.newTuple(Arrays.asList(bag, 1));
Tuple result2 = extractor.exec(test2);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result2.get(i), values[i]);
}
}
示例11: accumulate
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Override
public void accumulate(final Tuple inputTuple) throws IOException {
if (inputTuple == null || inputTuple.size() < 1 || inputTuple.isNull(0)) {
return;
}
final DataBag sketches = (DataBag) inputTuple.get(0);
if (union_ == null) {
union_ = VarOptItemsUnion.newInstance(maxK_);
}
for (Tuple t : sketches) {
final DataByteArray dba = (DataByteArray) t.get(0);
final Memory sketch = Memory.wrap(dba.get());
union_.update(sketch, SERDE);
}
}
示例12: checkNotDBAExcep
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void checkNotDBAExcep() throws IOException {
DataToSketch inter = new DataToSketch();
//create inputTuple and a bag, add bag to inputTuple
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag);
Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
bag.add(innerTuple);
inter.accumulate(inputTuple); //add empty tuple
innerTuple.set(0, new Double(1.0)); //not a DBA
inter = new DataToSketch();
inter.accumulate(inputTuple); //add wrong type
}
示例13: exec
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void exec() throws Exception {
EvalFunc<Tuple> func = new DataToFrequentStringsSketch("8");
DataBag bag = BagFactory.getInstance().newDefaultBag();
bag.add(PigUtil.objectsToTuple("a"));
bag.add(PigUtil.objectsToTuple("b", 5L));
bag.add(PigUtil.objectsToTuple("a", 2L));
bag.add(PigUtil.objectsToTuple("b"));
Tuple inputTuple = PigUtil.objectsToTuple(bag);
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 1);
DataByteArray bytes = (DataByteArray) resultTuple.get(0);
Assert.assertTrue(bytes.size() > 0);
ItemsSketch<String> sketch = ItemsSketch.getInstance(Memory.wrap(bytes.get()), new ArrayOfStringsSerDe());
Assert.assertEquals(sketch.getNumActiveItems(), 2);
Assert.assertEquals(sketch.getEstimate("a"), 3);
Assert.assertEquals(sketch.getEstimate("b"), 6);
}
示例14: testUDF
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test
public void testUDF() throws IOException {
NullTupleFieldsToNull udf = new NullTupleFieldsToNull();
TupleFactory tupleFactory = TupleFactory.getInstance();
BagFactory bagFactory = BagFactory.getInstance();
DataBag emptyBag = bagFactory.newDefaultBag();
DataBag bag = bagFactory.newDefaultBag(Arrays.asList(new Tuple[]{tupleFactory.newTuple("tup1")}));
Tuple nullTuple = tupleFactory.newTuple(Lists.newArrayList(null, null, null));
Tuple tuple = tupleFactory.newTuple(Lists.newArrayList(null, null, "tup1"));
assertNull(udf.exec(null));
assertNull(udf.exec(tupleFactory.newTuple()));
assertNull(udf.exec(tupleFactory.newTuple((Tuple)null)));
assertNull(udf.exec(tupleFactory.newTuple(tupleFactory.newTuple())));
assertNull(udf.exec(tupleFactory.newTuple(nullTuple)));
assertEquals(tuple, udf.exec(tupleFactory.newTuple(tuple)));
assertNull(udf.exec(tupleFactory.newTuple(Lists.newArrayList(bag, emptyBag))));
}
示例15: checkAlgFinalInnerNotDBA
import org.apache.pig.data.DataBag; //导入依赖的package包/类
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkAlgFinalInnerNotDBA() throws IOException {
EvalFunc<Tuple> interFuncFinal = new Intersect.IntermediateFinal();
EvalFunc<Double> estFunc = new Estimate();
Tuple inputTuple = TupleFactory.getInstance().newTuple(1);
Tuple resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
DataBag bag = BagFactory.getInstance().newDefaultBag();
inputTuple.set(0, bag); //inputTuple.bag0:null
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
Tuple innerTuple = TupleFactory.getInstance().newTuple(1);
bag.add(innerTuple);
innerTuple.set(0, new Double(1.0)); //not a DBA
resultTuple = interFuncFinal.exec(inputTuple);
assertEquals(estFunc.exec(resultTuple), 0.0, 0.0);
}