本文整理汇总了Java中org.apache.pig.data.DataByteArray类的典型用法代码示例。如果您正苦于以下问题:Java DataByteArray类的具体用法?Java DataByteArray怎么用?Java DataByteArray使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DataByteArray类属于org.apache.pig.data包,在下文中一共展示了DataByteArray类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: manyEntriesTwoValuesInputSketch
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Test
public void manyEntriesTwoValuesInputSketch() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchToVariances();
ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build();
Random rand = new Random(0);
int numKeys = 10000; // to saturate the sketch with default number of nominal entries (4K)
for (int i = 0; i < numKeys; i++ ) {
// two random values normally distributed with standard deviations of 1 and 10
sketch.update(i, new double[] {rand.nextGaussian(), rand.nextGaussian() * 10.0});
}
Assert.assertTrue(sketch.getRetainedEntries() >= 4096);
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 2);
Assert.assertEquals((double) resultTuple.get(0), 1.0, 0.04);
Assert.assertEquals((double) resultTuple.get(1), 100.0, 100.0 * 0.04); // squared standard deviation within 4%
}
示例2: updateUnion
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
static void updateUnion(final DataBag bag, final Union union) throws ExecException {
// Bag is not empty. process each innerTuple in the bag
for (final Tuple innerTuple : bag) {
final Object f0 = innerTuple.get(0); // consider only field 0
if (f0 == null) {
continue;
}
final byte type = innerTuple.getType(0);
if (type == DataType.BYTEARRAY) {
final DataByteArray dba = (DataByteArray) f0;
union.update(HllSketch.wrap(Memory.wrap(dba.get())));
} else {
throw new IllegalArgumentException("Field type was not DataType.BYTEARRAY: " + type);
}
}
}
示例3: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public DataBag exec(final Tuple input) throws IOException {
if ((input == null) || (input.size() == 0)) {
return null;
}
final DataByteArray dba = (DataByteArray) input.get(0);
final ItemsSketch<String> sketch =
ItemsSketch.getInstance(Memory.wrap(dba.get()), new ArrayOfStringsSerDe());
final ItemsSketch.Row<String>[] result = sketch.getFrequentItems(errorType);
final DataBag bag = BagFactory.getInstance().newDefaultBag();
for (int i = 0; i < result.length; i++) {
final Tuple tuple = TupleFactory.getInstance().newTuple(4);
tuple.set(0, result[i].getItem());
tuple.set(1, result[i].getEstimate());
tuple.set(2, result[i].getLowerBound());
tuple.set(3, result[i].getUpperBound());
bag.add(tuple);
}
return bag;
}
示例4: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Tuple exec(final Tuple input) throws IOException {
if (input.size() < 2) {
throw new IllegalArgumentException(
"expected two or more inputs: sketch and list of split points");
}
if (!(input.get(0) instanceof DataByteArray)) {
throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
final double[] splitPoints = new double[input.size() - 1];
for (int i = 1; i < input.size(); i++) {
if (!(input.get(i) instanceof Double)) {
throw new IllegalArgumentException("expected a double value as a split point, got "
+ input.get(i).getClass().getSimpleName());
}
splitPoints[i - 1] = (double) input.get(i);
}
return Util.doubleArrayToTuple(sketch.getPMF(splitPoints));
}
示例5: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Double exec(final Tuple input) throws IOException {
if (input.size() != 2) {
throw new IllegalArgumentException("expected two inputs: sketch and fraction");
}
if (!(input.get(0) instanceof DataByteArray)) {
throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
final DoublesSketch sketch = DoublesSketch.wrap(Memory.wrap(dba.get()));
if (!(input.get(1) instanceof Double)) {
throw new IllegalArgumentException("expected a double value as a fraction, got "
+ input.get(1).getClass().getSimpleName());
}
final double fraction = (double) input.get(1);
return sketch.getQuantile(fraction);
}
示例6: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Integer exec(final Tuple input) throws IOException {
if (input.size() != 1) {
throw new IllegalArgumentException("expected one input");
}
if (!(input.get(0) instanceof DataByteArray)) {
throw new IllegalArgumentException("expected a DataByteArray as a sketch, got "
+ input.get(0).getClass().getSimpleName());
}
final DataByteArray dba = (DataByteArray) input.get(0);
final ItemsSketch<String> sketch =
ItemsSketch.getInstance(Memory.wrap(dba.get()), Comparator.naturalOrder(),
new ArrayOfStringsSerDe());
return sketch.getK();
}
示例7: updateUnion
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
/**
* Updates a union given a bag of sketches
*
* @param bag A bag of sketchTuples.
* @param union The union to update
*/
private static <T> void updateUnion(final DataBag bag, final ItemsUnion<T> union,
final Comparator<T> comparator, final ArrayOfItemsSerDe<T> serDe) throws ExecException {
for (Tuple innerTuple: bag) {
final Object f0 = innerTuple.get(0);
if (f0 == null) { continue; }
if (f0 instanceof DataByteArray) {
final DataByteArray dba = (DataByteArray) f0;
if (dba.size() > 0) {
union.update(ItemsSketch.getInstance(Memory.wrap(dba.get()), comparator, serDe));
}
} else {
throw new IllegalArgumentException("Field type was not DataType.BYTEARRAY: " + innerTuple.getType(0));
}
}
}
示例8: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Double exec(final Tuple input) throws IOException {
if (input.size() != 2) {
throw new IllegalArgumentException("expected two inputs: sketch and pecentile");
}
final DataByteArray dba = (DataByteArray) input.get(0);
final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(Memory.wrap(dba.get()));
final double percentile = (double) input.get(1);
if ((percentile < 0) || (percentile > 100)) {
throw new IllegalArgumentException("percentile must be between 0 and 100");
}
final UpdateDoublesSketch qs = DoublesSketch.builder().setK(QUANTILES_SKETCH_SIZE).build();
final SketchIterator<DoubleSummary> it = sketch.iterator();
while (it.next()) {
qs.update(it.getSummary().getValue());
}
return qs.getQuantile(percentile / 100);
}
示例9: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Tuple exec(final Tuple input) throws IOException {
if ((input == null) || (input.size() == 0)) {
return null;
}
final DataByteArray dba = (DataByteArray) input.get(0);
final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
final double[] estimates = new double[sketch.getNumValues() + 1];
estimates[0] = sketch.getEstimate();
if (sketch.getRetainedEntries() > 0) { // remove unnecessary check when version of sketches-core > 0.4.0
final ArrayOfDoublesSketchIterator it = sketch.iterator();
while (it.next()) {
final double[] values = it.getValues();
for (int i = 0; i < sketch.getNumValues(); i++) {
estimates[i + 1] += values[i];
}
}
for (int i = 0; i < sketch.getNumValues(); i++) {
estimates[i + 1] /= sketch.getTheta();
}
}
return Util.doubleArrayToTuple(estimates);
}
示例10: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Tuple exec(final Tuple inputTuple) throws IOException {
if (isFirstCall_) {
// this is to see in the log which way was used by Pig
Logger.getLogger(getClass()).info("exec is used");
isFirstCall_ = false;
}
if ((inputTuple == null) || (inputTuple.size() == 0)) {
return null;
}
if (inputTuple.size() != 1) {
throw new IllegalArgumentException("Input tuple must have 1 bag");
}
final UpdatableSketch<U, S> sketch = sketchBuilder_.build();
final DataBag bag = (DataBag) inputTuple.get(0);
updateSketch(bag, sketch);
return Util.tupleFactory.newTuple(new DataByteArray(sketch.compact().toByteArray()));
}
示例11: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Tuple exec(final Tuple input) throws IOException {
if ((input == null) || (input.size() == 0)) {
return null;
}
final DataByteArray dba = (DataByteArray) input.get(0);
final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketches.wrapSketch(Memory.wrap(dba.get()));
if (sketch.getRetainedEntries() < 1) {
return null;
}
final SummaryStatistics[] summaries = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketch);
final Tuple variances = TupleFactory.getInstance().newTuple(sketch.getNumValues());
for (int i = 0; i < sketch.getNumValues(); i++) {
variances.set(i, summaries[i].getVariance());
}
return variances;
}
示例12: exec
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Override
public Tuple exec(final Tuple input) throws IOException {
if ((input == null) || (input.size() == 0)) {
return null;
}
final DataByteArray dba = (DataByteArray) input.get(0);
final Sketch<DoubleSummary> sketch = Sketches.heapifySketch(Memory.wrap(dba.get()));
final Tuple output = TupleFactory.getInstance().newTuple(2);
output.set(0, sketch.getEstimate());
double sum = 0;
final SketchIterator<DoubleSummary> it = sketch.iterator();
while (it.next()) {
sum += it.getSummary().getValue();
}
output.set(1, sum / sketch.getTheta());
return output;
}
示例13: updateUnion
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
/*************************************************************************************************
* Updates a union from a bag of sketches
*
* @param bag A bag of sketchTuples.
* @param union The union to update
*/
private static void updateUnion(final DataBag bag, final com.yahoo.sketches.theta.Union union) {
// Bag is not empty. process each innerTuple in the bag
for (Tuple innerTuple : bag) {
// validate the inner Tuples
final Object f0 = extractFieldAtIndex(innerTuple, 0);
if (f0 == null) {
continue;
}
final Byte type = extractTypeAtIndex(innerTuple, 0);
if (type == null) {
continue;
}
// add only the first field of the innerTuple to the union
if (type == DataType.BYTEARRAY) {
final DataByteArray dba = (DataByteArray) f0;
if (dba.size() > 0) {
union.update(Memory.wrap(dba.get()));
}
} else {
throw new IllegalArgumentException("Field type was not DataType.BYTEARRAY: " + type);
}
}
}
示例14: manyEntriesTwoValuesInputSketch
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Test
public void manyEntriesTwoValuesInputSketch() throws Exception {
EvalFunc<Tuple> func = new ArrayOfDoublesSketchToMeans();
ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build();
Random rand = new Random(0);
int numKeys = 10000; // to saturate the sketch with default number of nominal entries (4K)
for (int i = 0; i < numKeys; i++ ) {
// two random values normally distributed with means of 0 and 1
sketch.update(i, new double[] {rand.nextGaussian(), rand.nextGaussian() + 1.0});
}
Assert.assertTrue(sketch.getRetainedEntries() >= 4096);
Tuple inputTuple = PigUtil.objectsToTuple(new DataByteArray(sketch.compact().toByteArray()));
Tuple resultTuple = func.exec(inputTuple);
Assert.assertNotNull(resultTuple);
Assert.assertEquals(resultTuple.size(), 2);
Assert.assertEquals((double) resultTuple.get(0), 0.0, 0.04);
Assert.assertEquals((double) resultTuple.get(1), 1.0, 0.04);
}
示例15: algebraicFinal
import org.apache.pig.data.DataByteArray; //导入依赖的package包/类
@Test
public void algebraicFinal() {
final int k = 87;
final int wtIdx = 2;
final VarOptSampling.Final udf
= new VarOptSampling.Final(Integer.toString(k),Integer.toString(wtIdx));
final DataBag inputBag = BagFactory.getInstance().newDefaultBag();
final VarOptItemsSketch<Tuple> vis = VarOptItemsSketch.newInstance(k);
inputBag.add(TupleFactory.getInstance().newTuple(new DataByteArray(vis.toByteArray(serDe_))));
final Tuple inputTuple = TupleFactory.getInstance().newTuple(inputBag);
try {
final DataBag result = udf.exec(inputTuple);
assertNotNull(result);
assertEquals(result.size(), 0);
} catch (final IOException e) {
fail("Unexpected IOException");
}
}