本文整理汇总了Java中org.apache.crunch.PCollection.getTypeFamily方法的典型用法代码示例。如果您正苦于以下问题:Java PCollection.getTypeFamily方法的具体用法?Java PCollection.getTypeFamily怎么用?Java PCollection.getTypeFamily使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.crunch.PCollection
的用法示例。
在下文中一共展示了PCollection.getTypeFamily方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: lloydsAlgorithm
import org.apache.crunch.PCollection; //导入方法依赖的package包/类
/**
* Runs Lloyd's algorithm on the given points for a given number of iterations, returning the final
* centers that result.
*
* @param points The data points to cluster
* @param centers The list of initial centers
* @param numIterations The number of iterations to run, with each iteration corresponding to a MapReduce job
* @param approx Whether to use random projection for assigning points to centers
*/
public <V extends RealVector> List<Centers> lloydsAlgorithm(PCollection<V> points, List<Centers> centers,
int numIterations, boolean approx) {
PTypeFamily tf = points.getTypeFamily();
PTableType<Pair<Integer, Integer>, Pair<V, Long>> ptt = tf.tableOf(tf.pairs(tf.ints(), tf.ints()),
tf.pairs(points.getPType(), tf.longs()));
Aggregator<Pair<V, Long>> agg = new SumVectorsAggregator<V>();
for (int i = 0; i < numIterations; i++) {
KSketchIndex index = new KSketchIndex(centers, projectionBits, projectionSamples, seed);
LloydsMapFn<V> mapFn = new LloydsMapFn<V>(index, approx);
centers = new LloydsCenters<V>(points.parallelDo("lloyds-" + i, mapFn, ptt)
.groupByKey()
.combineValues(agg), centers.size()).getValue();
}
return centers;
}
示例2: apply
import org.apache.crunch.PCollection; //导入方法依赖的package包/类
public <T> PCollection<Pair<Integer, T>> apply(PCollection<T> pcollect) {
PTypeFamily ptf = pcollect.getTypeFamily();
PType<Pair<Integer, T>> pt = ptf.pairs(ptf.ints(), pcollect.getPType());
return pcollect.parallelDo("crossfold", new MapFn<T, Pair<Integer, T>>() {
private transient RandomGenerator rand;
@Override
public void initialize() {
if (rand == null) {
this.rand = RandomManager.getSeededRandom(seed);
}
}
@Override
public Pair<Integer, T> map(T t) {
return Pair.of(rand.nextInt(numFolds), t);
}
}, pt);
}
示例3: sample
import org.apache.crunch.PCollection; //导入方法依赖的package包/类
public static <T> PCollection<T> sample(
PCollection<T> input,
int sampleSize,
RandomGenerator random) {
PTypeFamily ptf = input.getTypeFamily();
PType<Pair<T, Integer>> ptype = ptf.pairs(input.getPType(), ptf.ints());
return weightedSample(
input.parallelDo(new MapFn<T, Pair<T, Integer>>() {
@Override
public Pair<T, Integer> map(T t) { return Pair.of(t, 1); }
}, ptype),
sampleSize,
random);
}
示例4: weightedSample
import org.apache.crunch.PCollection; //导入方法依赖的package包/类
public static <T, N extends Number> PCollection<T> weightedSample(
PCollection<Pair<T, N>> input,
int sampleSize,
RandomGenerator random) {
PTypeFamily ptf = input.getTypeFamily();
PTable<Integer, Pair<T, N>> groupedIn = input.parallelDo(
new MapFn<Pair<T, N>, Pair<Integer, Pair<T, N>>>() {
@Override
public Pair<Integer, Pair<T, N>> map(Pair<T, N> p) {
return Pair.of(0, p);
}
}, ptf.tableOf(ptf.ints(), input.getPType()));
return groupedWeightedSample(groupedIn, sampleSize, random).values();
}