当前位置: 首页>>代码示例>>Java>>正文


Java DefaultBagFactory类代码示例

本文整理汇总了Java中org.apache.pig.data.DefaultBagFactory的典型用法代码示例。如果您正苦于以下问题:Java DefaultBagFactory类的具体用法?Java DefaultBagFactory怎么用?Java DefaultBagFactory使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


DefaultBagFactory类属于org.apache.pig.data包,在下文中一共展示了DefaultBagFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testUniformDistribution

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testUniformDistribution() throws IOException {
  Random rnd = getRandomInstance();
  for (int i = 0; i < 100; i++) {
    DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
    for (int j = 0; j < 100; j++) {
      bag.add(TupleFactory.getInstance().newTuple(new Integer(rnd.nextInt(100))));
    }
    Tuple result = Histogram.exec(bag, 0, 99, 1);
    //System.err.println(result);
    Assert.assertEquals(result.size(), 1);
    Assert.assertEquals(result.get(0), 100L);

    result = Histogram.exec(bag, 0, 99, 2);
    //System.err.println(result);
    Assert.assertEquals(result.size(), 2);

    result = Histogram.exec(bag, 0, 99, 10);
    //System.err.println(result);
    Assert.assertEquals(result.size(), 10);
  }
}
 
开发者ID:lucidworks,项目名称:pig-solr,代码行数:23,代码来源:TestHistogram.java

示例2: testNormalDistribution

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testNormalDistribution() throws IOException {
  Random rnd = getRandomInstance();
  for (int i = 0; i < 1000; i++) {
    DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
    for (int j = 0; j < 100; j++) {
      int x = Math.max(Math.min((int) (50 + rnd.nextGaussian() * 50), 100), 0);
      bag.add(TupleFactory.getInstance().newTuple(new Integer(x)));
    }
    Tuple result = Histogram.exec(bag, 0, 100, 1);
    //System.err.println(result);
    Assert.assertEquals(result.size(), 1);
    Assert.assertEquals(result.get(0), 100L);

    result = Histogram.exec(bag, 0, 100, 2);
    //System.err.println(result);
    Assert.assertEquals(result.size(), 2);

    result = Histogram.exec(bag, 0, 100, 10);
    //System.err.println(result);
    Assert.assertEquals(result.size(), 10);
  }
}
 
开发者ID:lucidworks,项目名称:pig-solr,代码行数:24,代码来源:TestHistogram.java

示例3: testSimple

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSimple() throws IOException {
  Extract extractor = new Extract();
  TupleFactory maker = TupleFactory.getInstance();
  DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();

  int[] values = new int[] { 6, 22 };
  String[] strings = new String[] { "term1", "term2" };
  Tuple[] tuples = new Tuple[2];
  for (int i = 0; i < tuples.length; i++) {
    tuples[i] = maker.newTuple(Arrays.asList(strings[i], values[i]));
    bag.add(tuples[i]);
  }

  Tuple test1 = maker.newTuple(Arrays.asList(bag, 0));
  Tuple result1 = extractor.exec(test1);
  for (int i = 0; i < tuples.length; i++) {
    assertEquals(result1.get(i), strings[i]);
  }

  Tuple test2 = maker.newTuple(Arrays.asList(bag, 1));
  Tuple result2 = extractor.exec(test2);
  for (int i = 0; i < tuples.length; i++) {
    assertEquals(result2.get(i), values[i]);
  }
}
 
开发者ID:lucidworks,项目名称:pig-solr,代码行数:27,代码来源:ExtractTest.java

示例4: exec

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0)
        return null;
    try{
        DataBag output = DefaultBagFactory.getInstance().newDefaultBag();
        String query = (String)input.get(0);
        String[] words = TutorialUtil.splitToWords(query);
        Set<String> ngrams = new HashSet<String>();
        TutorialUtil.makeNGram(words, ngrams, _ngramSizeLimit);
        for (String ngram : ngrams) {
            Tuple t = TupleFactory.getInstance().newTuple(1);
            t.set(0, ngram);
            output.add(t);
        }
        return output;
    }catch(Exception e){
        System.err.println("NGramGenerator: failed to process input; error - " + e.getMessage());
        return null;
    }
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:21,代码来源:NGramGenerator.java

示例5: exec

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException 
{
	if (input == null || input.size() == 0)
		return null;
       	String str;

	try{
		str = (String)input.get(0);
	} catch(Exception e){
		System.err.println ("Failed to process input; error - " + e.getMessage());
		return null;
	}

	DataBag output = DefaultBagFactory.getInstance().newDefaultBag();

       	StringTokenizer tok = new StringTokenizer(str, " \",()*", false);
       	while (tok.hasMoreTokens()) 
	{
           		output.add(DefaultTupleFactory.getInstance().newTuple(tok.nextToken()));
	}

	return output;
       }
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:24,代码来源:TestEvalFunc.java

示例6: testSerialize__bag

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSerialize__bag() throws IOException {
    Tuple t = tf.newTuple(1);
    Tuple t1 = tf.newTuple(2);
    Tuple t2 = tf.newTuple(2);
    List<Tuple> bagTuples = new ArrayList<Tuple>();
    bagTuples.add(t1);
    bagTuples.add(t2);
    t1.set(0, "A");
    t1.set(1, "B");
    t2.set(0, 1);
    t2.set(1, 2);
    DataBag b = DefaultBagFactory.getInstance().newDefaultBag(bagTuples);
    t.set(0,b);
    byte[] expectedOutput = "|{_|(_CA|,_CB|)_|,_|(_I1|,_I2|)_|}_|_\n".getBytes();
    Assert.assertTrue(assertEquals(expectedOutput, ps.serializeToBytes(t)));
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:18,代码来源:TestPigStreamingUDF.java

示例7: testSerialize__bag

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testSerialize__bag() throws IOException {
    Tuple t = tf.newTuple(1);
    Tuple t1 = tf.newTuple(2);
    Tuple t2 = tf.newTuple(2);
    List<Tuple> bagTuples = new ArrayList<Tuple>();
    bagTuples.add(t1);
    bagTuples.add(t2);
    t1.set(0, "A");
    t1.set(1, "B");
    t2.set(0, 1);
    t2.set(1, 2);
    DataBag b = DefaultBagFactory.getInstance().newDefaultBag(bagTuples);
    t.set(0,b);
    byte[] expectedOutput = "{(A,B),(1,2)}\n".getBytes();
    byte[] output = ps.serialize(t);
    Assert.assertArrayEquals(expectedOutput, output);
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:19,代码来源:TestPigStreaming.java

示例8: numberArrayToDataBag

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private static DataBag numberArrayToDataBag(Number... values) {
  DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();
  for (Number value : values) {
    bag.add(TupleFactory.getInstance().newTuple(value));
  }
  return bag;
}
 
开发者ID:lucidworks,项目名称:pig-solr,代码行数:8,代码来源:TestHistogram.java

示例9: testLongBag

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testLongBag() throws IOException {
  Extract extractor = new Extract();
  TupleFactory maker = TupleFactory.getInstance();
  DataBag bag = DefaultBagFactory.getInstance().newDefaultBag();

  int[] values = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
  String[] strings = new String[] { "term0", "term1", "term2", "term3", "term4", "term5", "term6", "term7", "term8",
          "term9", "term10" };
  Tuple[] tuples = new Tuple[11];
  for (int i = 0; i < tuples.length; i++) {
    tuples[i] = maker.newTuple(Arrays.asList(strings[i], values[i]));
    bag.add(tuples[i]);
  }

  Tuple test1 = maker.newTuple(Arrays.asList(bag, 0));
  Tuple result1 = extractor.exec(test1);
  for (int i = 0; i < tuples.length; i++) {
    assertEquals(result1.get(i), strings[i]);
  }

  Tuple test2 = maker.newTuple(Arrays.asList(bag, 1));
  Tuple result2 = extractor.exec(test2);
  for (int i = 0; i < tuples.length; i++) {
    assertEquals(result2.get(i), values[i]);
  }
}
 
开发者ID:lucidworks,项目名称:pig-solr,代码行数:28,代码来源:ExtractTest.java

示例10: consumeBag

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException {
    if (fieldSchema==null) {
        throw new IOException("Schema is null");
    }
    ResourceFieldSchema[] fss=fieldSchema.getSchema().getFields();
    Tuple t;
    int buf;
    while ((buf=in.read())!='{') {
        if (buf==-1) {
            throw new IOException("Unexpect end of bag");
        }
    }
    if (fss.length!=1)
        throw new IOException("Only tuple is allowed inside bag schema");
    ResourceFieldSchema fs = fss[0];
    DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
    while (true) {
        t = consumeTuple(in, fs);
        if (t!=null)
            db.add(t);
        while ((buf=in.read())!='}'&&buf!=',') {
            if (buf==-1) {
                throw new IOException("Unexpect end of bag");
            }
        }
        if (buf=='}')
            break;
    }
    return db;
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:31,代码来源:Utf8StorageConverter.java

示例11: exec

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Override
public DataBag exec(Tuple input) throws IOException {
    try {

        DataBag bag =  DefaultBagFactory.getInstance().newDefaultBag();
   
        if  ( input == null  || input.size()== 0) {
            return bag; //an empty bag
        }
        if ( this.fieldType == DataType.MAP ) { 

          Tuple t = DefaultTupleFactory.getInstance().newTuple(1);
          t.set(0, createMap(input));

          bag.add( t ); 

        } else {
           bag.add(input);
        }

        return bag;

    } catch (Exception e) {
throw new RuntimeException( "Error while computing size in " + this.getClass().getSimpleName());

    }
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:28,代码来源:TOBAG.java

示例12: testGetNextTuple

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
@Test
public void testGetNextTuple() throws ExecException, IOException {
    Tuple t = null;
    DataBag outBag = DefaultBagFactory.getInstance().newDefaultBag();
    for(Result res=sp.getNextTuple();res.returnStatus!=POStatus.STATUS_EOP;res=sp.getNextTuple()){
        outBag.add(castToDBA((Tuple)res.result));
    }
    assertTrue(TestHelper.compareBags(expBag, outBag));
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:10,代码来源:TestUnion.java

示例13: genFloatDataBag

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static DataBag genFloatDataBag(Random r, int column, int row) {
    DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
    for (int i=0;i<row;i++) {
        Tuple t = TupleFactory.getInstance().newTuple();
        for (int j=0;j<column;j++) {
            t.append(r.nextFloat()*1000);
        }
        db.add(t);
    }
    return db;
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:12,代码来源:GenRandomData.java

示例14: projectBag

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static DataBag projectBag(DataBag db2, int i) throws ExecException {
    DataBag ret = DefaultBagFactory.getInstance().newDefaultBag();
    for (Tuple tuple : db2) {
        Object o = tuple.get(i);
        Tuple t1 = new DefaultTuple();
        t1.append(o);
        ret.add(t1);
    }
    return ret;
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:11,代码来源:TestHelper.java

示例15: areFilesSame

import org.apache.pig.data.DefaultBagFactory; //导入依赖的package包/类
public static boolean areFilesSame(FileSpec expLocal, FileSpec actHadoop, PigContext pc) throws ExecException, IOException{
    Random r = new Random();
    
    POLoad ldExp = new POLoad(new OperatorKey("", r.nextLong()));
    ldExp.setPc(pc);
    ldExp.setLFile(expLocal);
    
    POLoad ldAct = new POLoad(new OperatorKey("", r.nextLong()));
    ldAct.setPc(pc);
    ldAct.setLFile(actHadoop);
    
    Tuple t = null;
    int numActTuples = -1;
    DataBag bagAct = DefaultBagFactory.getInstance().newDefaultBag();
    Result resAct = null;
    while((resAct = ldAct.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
        ++numActTuples;
        bagAct.add(trimTuple((Tuple)resAct.result));
    }
    
    int numExpTuples = -1;
    DataBag bagExp = DefaultBagFactory.getInstance().newDefaultBag();
    Result resExp = null;
    while((resExp = ldExp.getNextTuple()).returnStatus!=POStatus.STATUS_EOP){
        ++numExpTuples;
        bagExp.add(trimTuple((Tuple)resExp.result));
    }
    
    if(numActTuples!=numExpTuples)
        return false;
    
    return compareBags(bagExp, bagAct);
}
 
开发者ID:sigmoidanalytics,项目名称:spork-streaming,代码行数:34,代码来源:TestHelper.java


注:本文中的org.apache.pig.data.DefaultBagFactory类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。