本文整理汇总了Java中org.apache.pig.data.DefaultBagFactory类的典型用法代码示例。如果您正苦于以下问题:Java DefaultBagFactory类的具体用法?Java DefaultBagFactory怎么用?Java DefaultBagFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DefaultBagFactory类属于org.apache.pig.data包,在下文中一共展示了DefaultBagFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testUniformDistribution
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testUniformDistribution() throws IOException {
Random rnd = getRandomInstance();
for (int i = 0; i < 100; i++) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (int j = 0; j < 100; j++) {
bag.add(TupleFactory.getInstance().newTuple(new Integer(rnd.nextInt(100))));
}
Tuple result = Histogram.exec(bag, 0, 99, 1);
//System.err.println(result);
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0), 100L);
result = Histogram.exec(bag, 0, 99, 2);
//System.err.println(result);
Assert.assertEquals(result.size(), 2);
result = Histogram.exec(bag, 0, 99, 10);
//System.err.println(result);
Assert.assertEquals(result.size(), 10);
}
}
示例2: testNormalDistribution
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testNormalDistribution() throws IOException {
Random rnd = getRandomInstance();
for (int i = 0; i < 1000; i++) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (int j = 0; j < 100; j++) {
int x = Math.max(Math.min((int) (50 + rnd.nextGaussian() * 50), 100), 0);
bag.add(TupleFactory.getInstance().newTuple(new Integer(x)));
}
Tuple result = Histogram.exec(bag, 0, 100, 1);
//System.err.println(result);
Assert.assertEquals(result.size(), 1);
Assert.assertEquals(result.get(0), 100L);
result = Histogram.exec(bag, 0, 100, 2);
//System.err.println(result);
Assert.assertEquals(result.size(), 2);
result = Histogram.exec(bag, 0, 100, 10);
//System.err.println(result);
Assert.assertEquals(result.size(), 10);
}
}
示例3: testSimple
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSimple() throws IOException {
Extract extractor = new Extract();
TupleFactory maker = TupleFactory.getInstance();
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
int[] values = new int[] { 6, 22 };
String[] strings = new String[] { "term1", "term2" };
Tuple[] tuples = new Tuple[2];
for (int i = 0; i < tuples.length; i++) {
tuples[i] = maker.newTuple(Arrays.asList(strings[i], values[i]));
bag.add(tuples[i]);
}
Tuple test1 = maker.newTuple(Arrays.asList(bag, 0));
Tuple result1 = extractor.exec(test1);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result1.get(i), strings[i]);
}
Tuple test2 = maker.newTuple(Arrays.asList(bag, 1));
Tuple result2 = extractor.exec(test2);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result2.get(i), values[i]);
}
}
示例4: exec
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
try{
DataBag output = DefaultBagFactory.getInstance().newDefaultBag();
String query = (String)input.get(0);
String[] words = TutorialUtil.splitToWords(query);
Set<String> ngrams = new HashSet<String>();
TutorialUtil.makeNGram(words, ngrams, _ngramSizeLimit);
for (String ngram : ngrams) {
Tuple t = TupleFactory.getInstance().newTuple(1);
t.set(0, ngram);
output.add(t);
}
return output;
}catch(Exception e){
System.err.println("NGramGenerator: failed to process input; error - " + e.getMessage());
return null;
}
}
示例5: exec
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException
{
if (input == null || input.size() == 0)
return null;
String str;
try{
str = (String)input.get(0);
} catch(Exception e){
System.err.println ("Failed to process input; error - " + e.getMessage());
return null;
}
DataBag output = DefaultBagFactory.getInstance().newDefaultBag();
StringTokenizer tok = new StringTokenizer(str, " \",()*", false);
while (tok.hasMoreTokens())
{
output.add(DefaultTupleFactory.getInstance().newTuple(tok.nextToken()));
}
return output;
}
示例6: testSerialize__bag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSerialize__bag() throws IOException {
Tuple t = tf.newTuple(1);
Tuple t1 = tf.newTuple(2);
Tuple t2 = tf.newTuple(2);
List<Tuple> bagTuples = new ArrayList<Tuple>();
bagTuples.add(t1);
bagTuples.add(t2);
t1.set(0, "A");
t1.set(1, "B");
t2.set(0, 1);
t2.set(1, 2);
DataBag b = DefaultBagFactory.getInstance().newDefaultBag(bagTuples);
t.set(0,b);
byte[] expectedOutput = "|{_|(_CA|,_CB|)_|,_|(_I1|,_I2|)_|}_|_\n".getBytes();
Assert.assertTrue(assertEquals(expectedOutput, ps.serializeToBytes(t)));
}
示例7: testSerialize__bag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSerialize__bag() throws IOException {
Tuple t = tf.newTuple(1);
Tuple t1 = tf.newTuple(2);
Tuple t2 = tf.newTuple(2);
List<Tuple> bagTuples = new ArrayList<Tuple>();
bagTuples.add(t1);
bagTuples.add(t2);
t1.set(0, "A");
t1.set(1, "B");
t2.set(0, 1);
t2.set(1, 2);
DataBag b = DefaultBagFactory.getInstance().newDefaultBag(bagTuples);
t.set(0,b);
byte[] expectedOutput = "{(A,B),(1,2)}\n".getBytes();
byte[] output = ps.serialize(t);
Assert.assertArrayEquals(expectedOutput, output);
}
示例8: numberArrayToDataBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private static DataBag numberArrayToDataBag(Number... values) {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
for (Number value : values) {
bag.add(TupleFactory.getInstance().newTuple(value));
}
return bag;
}
示例9: testLongBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testLongBag() throws IOException {
Extract extractor = new Extract();
TupleFactory maker = TupleFactory.getInstance();
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
int[] values = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
String[] strings = new String[] { "term0", "term1", "term2", "term3", "term4", "term5", "term6", "term7", "term8",
"term9", "term10" };
Tuple[] tuples = new Tuple[11];
for (int i = 0; i < tuples.length; i++) {
tuples[i] = maker.newTuple(Arrays.asList(strings[i], values[i]));
bag.add(tuples[i]);
}
Tuple test1 = maker.newTuple(Arrays.asList(bag, 0));
Tuple result1 = extractor.exec(test1);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result1.get(i), strings[i]);
}
Tuple test2 = maker.newTuple(Arrays.asList(bag, 1));
Tuple result2 = extractor.exec(test2);
for (int i = 0; i < tuples.length; i++) {
assertEquals(result2.get(i), values[i]);
}
}
示例10: consumeBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException {
if (fieldSchema==null) {
throw new IOException("Schema is null");
}
ResourceFieldSchema[] fss=fieldSchema.getSchema().getFields();
Tuple t;
int buf;
while ((buf=in.read())!='{') {
if (buf==-1) {
throw new IOException("Unexpect end of bag");
}
}
if (fss.length!=1)
throw new IOException("Only tuple is allowed inside bag schema");
ResourceFieldSchema fs = fss[0];
DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
while (true) {
t = consumeTuple(in, fs);
if (t!=null)
db.add(t);
while ((buf=in.read())!='}'&&buf!=',') {
if (buf==-1) {
throw new IOException("Unexpect end of bag");
}
}
if (buf=='}')
break;
}
return db;
}
示例11: exec
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
try {
DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
if ( input == null || input.size()== 0) {
return bag; //an empty bag
}
if ( this.fieldType == DataType.MAP ) {
Tuple t = DefaultTupleFactory.getInstance().newTuple(1);
t.set(0, createMap(input));
bag.add( t );
} else {
bag.add(input);
}
return bag;
} catch (Exception e) {
throw new RuntimeException( "Error while computing size in " + this.getClass().getSimpleName());
}
}
示例12: testGetNextTuple
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testGetNextTuple() throws ExecException, IOException {
Tuple t = null;
DataBag outBag = DefaultBagFactory.getInstance().newDefaultBag();
for(Result res=sp.getNextTuple();res.returnStatus!=POStatus.STATUS_EOP;res=sp.getNextTuple()){
outBag.add(castToDBA((Tuple)res.result));
}
assertTrue(TestHelper.compareBags(expBag, outBag));
}
示例13: genFloatDataBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static DataBag genFloatDataBag(Random r, int column, int row) {
DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
for (int i=0;i<row;i++) {
Tuple t = TupleFactory.getInstance().newTuple();
for (int j=0;j<column;j++) {
t.append(r.nextFloat()*1000);
}
db.add(t);
}
return db;
}
示例14: projectBag
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static DataBag projectBag(DataBag db2, int i) throws ExecException {
DataBag ret = DefaultBagFactory.getInstance().newDefaultBag();
for (Tuple tuple : db2) {
Object o = tuple.get(i);
Tuple t1 = new DefaultTuple();
t1.append(o);
ret.add(t1);
}
return ret;
}
示例15: areFilesSame
import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static boolean areFilesSame(FileSpec expLocal, FileSpec actHadoop, PigContext pc) throws ExecException, IOException{
Random r = new Random();
POLoad ldExp = new POLoad(new OperatorKey("", r.nextLong()));
ldExp.setPc(pc);
ldExp.setLFile(expLocal);
POLoad ldAct = new POLoad(new OperatorKey("", r.nextLong()));
ldAct.setPc(pc);
ldAct.setLFile(actHadoop);
Tuple t = null;
int numActTuples = -1;
DataBag bagAct = DefaultBagFactory.getInstance().newDefaultBag();
Result resAct = null;
while((resAct = ldAct.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
++numActTuples;
bagAct.add(trimTuple((Tuple)resAct.result));
}
int numExpTuples = -1;
DataBag bagExp = DefaultBagFactory.getInstance().newDefaultBag();
Result resExp = null;
while((resExp = ldExp.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
++numExpTuples;
bagExp.add(trimTuple((Tuple)resExp.result));
}
if(numActTuples!=numExpTuples)
return false;
return compareBags(bagExp, bagAct);
}