Java PTypeFamily類代碼示例

本文整理匯總了Java中org.apache.crunch.types.PTypeFamily類的典型用法代碼示例。如果您正苦於以下問題：Java PTypeFamily類的具體用法？Java PTypeFamily怎麽用？Java PTypeFamily使用的例子？那麽, 這裏精選的類代碼示例或許可以為您提供幫助。

PTypeFamily類屬於org.apache.crunch.types包，在下文中一共展示了PTypeFamily類的9個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: lloydsAlgorithm

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
/**
 * Runs Lloyd's algorithm on the given points for a given number of iterations, returning the final
 * centers that result.
 * 
 * @param points The data points to cluster
 * @param centers The list of initial centers
 * @param numIterations The number of iterations to run, with each iteration corresponding to a MapReduce job
 * @param approx Whether to use random projection for assigning points to centers
 */
public <V extends RealVector> List<Centers> lloydsAlgorithm(PCollection<V> points, List<Centers> centers,
    int numIterations, boolean approx) {
  PTypeFamily tf = points.getTypeFamily();
  PTableType<Pair<Integer, Integer>, Pair<V, Long>> ptt = tf.tableOf(tf.pairs(tf.ints(), tf.ints()),
      tf.pairs(points.getPType(), tf.longs()));
  Aggregator<Pair<V, Long>> agg = new SumVectorsAggregator<V>();
  for (int i = 0; i < numIterations; i++) {
    KSketchIndex index = new KSketchIndex(centers, projectionBits, projectionSamples, seed);
    LloydsMapFn<V> mapFn = new LloydsMapFn<V>(index, approx);
    centers = new LloydsCenters<V>(points.parallelDo("lloyds-" + i, mapFn, ptt)
        .groupByKey()
        .combineValues(agg), centers.size()).getValue();
  }
  return centers;
}

開發者ID:apsaltis，項目名稱:oryx，代碼行數:25，代碼來源:KMeansParallel.java

示例2: apply

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
public <T> PCollection<Pair<Integer, T>> apply(PCollection<T> pcollect) {
  PTypeFamily ptf = pcollect.getTypeFamily();
  PType<Pair<Integer, T>> pt = ptf.pairs(ptf.ints(), pcollect.getPType());
  return pcollect.parallelDo("crossfold", new MapFn<T, Pair<Integer, T>>() {
    private transient RandomGenerator rand;
    
    @Override
    public void initialize() {
      if (rand == null) {
        this.rand = RandomManager.getSeededRandom(seed);
      }
    }
    
    @Override
    public Pair<Integer, T> map(T t) {
      return Pair.of(rand.nextInt(numFolds), t);
    }
    
  }, pt);
}

開發者ID:apsaltis，項目名稱:oryx，代碼行數:21，代碼來源:Crossfold.java

示例3: groupedWeightedSample

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
public static <K, T, N extends Number> PTable<K, T> groupedWeightedSample(
    PTable<K, Pair<T, N>> input,
    int sampleSize,
    RandomGenerator random) {
  PTypeFamily ptf = input.getTypeFamily();
  PType<K> keyType = input.getPTableType().getKeyType();
  @SuppressWarnings("unchecked")
  PType<T> ttype = (PType<T>) input.getPTableType().getValueType().getSubTypes().get(0);
  PTableType<K, Pair<Double, T>> ptt = ptf.tableOf(keyType, ptf.pairs(ptf.doubles(), ttype));

  // fill reservoirs by mapping over the vectors and re-emiting them; each map task emits at most sampleSize
  // vectors per fold; the combiner/reducer will combine the outputs and pare down to sampleSize vectors total
  PTable<K, Pair<Double, T>> samples = input.parallelDo("reservoirSampling",
      new SampleFn<K, T, N>(sampleSize, random, ttype), ptt);

  // pare down to just a single reservoir with sampleSize vectors
  PTable<K, Pair<Double, T>> reservoir = samples.groupByKey(1).combineValues(new WRSCombineFn<K, T>(sampleSize, ttype));

  // strip the weights off the final sampled reservoir and return
  return reservoir.parallelDo("strippingSamplingWeights", new MapFn<Pair<K, Pair<Double, T>>, Pair<K, T>>() {
    @Override
    public Pair<K, T> map(Pair<K, Pair<Double, T>> p) {
      return Pair.of(p.first(), p.second().second());
    }
  }, ptf.tableOf(keyType, ttype));
}

開發者ID:apsaltis，項目名稱:oryx，代碼行數:27，代碼來源:ReservoirSampling.java

示例4: swapKeyValue

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
/**
 * Swap the key and value part of a PTable. The original PTypes are used in the opposite order
 * @param table PTable to process
 * @param <K> Key type (will become value type)
 * @param <V> Value type (will become key type)
 * @return PType&lt;V, K&gt; containing the same data as the original
 */
public static <K, V> PTable<V, K> swapKeyValue(PTable<K, V> table) {
  PTypeFamily ptf = table.getTypeFamily();
  return table.parallelDo(new MapFn<Pair<K, V>, Pair<V, K>>() {
    @Override
    public Pair<V, K> map(Pair<K, V> input) {
      return Pair.of(input.second(), input.first());
    }
  }, ptf.tableOf(table.getValueType(), table.getKeyType()));
}

開發者ID:spotify，項目名稱:crunch-lib，代碼行數:17，代碼來源:SPTables.java

示例5: sample

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
public static <T> PCollection<T> sample(
    PCollection<T> input,
    int sampleSize,
    RandomGenerator random) {
  PTypeFamily ptf = input.getTypeFamily();
  PType<Pair<T, Integer>> ptype = ptf.pairs(input.getPType(), ptf.ints());
  return weightedSample(
      input.parallelDo(new MapFn<T, Pair<T, Integer>>() {
        @Override
        public Pair<T, Integer> map(T t) { return Pair.of(t, 1); }
      }, ptype),
      sampleSize,
      random);
}

開發者ID:apsaltis，項目名稱:oryx，代碼行數:15，代碼來源:ReservoirSampling.java

示例6: weightedSample

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
public static <T, N extends Number> PCollection<T> weightedSample(
    PCollection<Pair<T, N>> input,
    int sampleSize,
    RandomGenerator random) {
  PTypeFamily ptf = input.getTypeFamily();
  PTable<Integer, Pair<T, N>> groupedIn = input.parallelDo(
      new MapFn<Pair<T, N>, Pair<Integer, Pair<T, N>>>() {
        @Override
        public Pair<Integer, Pair<T, N>> map(Pair<T, N> p) {
          return Pair.of(0, p);
        }
      }, ptf.tableOf(ptf.ints(), input.getPType()));
  return groupedWeightedSample(groupedIn, sampleSize, random).values();
}

開發者ID:apsaltis，項目名稱:oryx，代碼行數:15，代碼來源:ReservoirSampling.java

示例7: distributed

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
/**
 * Calculate a set of percentiles for each key in a numerically-valued table.
 *
 * Percentiles are calculated on a per-key basis by counting, joining and sorting. This is highly scalable, but takes
 * 2 more map-reduce cycles than if you can guarantee that the value set will fit into memory. Use inMemory
 * if you have less than the order of 10M values per key.
 *
 * The percentile definition that we use here is the "nearest rank" defined here:
 * http://en.wikipedia.org/wiki/Percentile#Definition
 *
 * @param table numerically-valued PTable
 * @param p1 First percentile (in the range 0.0 - 1.0)
 * @param pn More percentiles (in the range 0.0 - 1.0)
 * @param <K> Key type of the table
 * @param <V> Value type of the table (must extends java.lang.Number)
 * @return PTable of each key with a collection of pairs of the percentile provided and it's result.
 */
public static <K, V extends Number> PTable<K, Result<V>> distributed(PTable<K, V> table,
        double p1, double... pn) {
  final List<Double> percentileList = createListFromVarargs(p1, pn);

  PTypeFamily ptf = table.getTypeFamily();
  PTable<K, Long> totalCounts = table.keys().count();
  PTable<K, Pair<Long, V>> countValuePairs = totalCounts.join(table);
  PTable<K, Pair<V, Long>> valueCountPairs =
          countValuePairs.mapValues(new SwapPairComponents<Long, V>(), ptf.pairs(table.getValueType(), ptf.longs()));


  return SecondarySort.sortAndApply(
          valueCountPairs,
          new DistributedPercentiles<K, V>(percentileList),
          ptf.tableOf(table.getKeyType(), Result.pType(table.getValueType())));
}

開發者ID:spotify，項目名稱:crunch-lib，代碼行數:34，代碼來源:Percentiles.java

示例8: csvRecord

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
public static PType<Record> csvRecord(PTypeFamily ptf, String delim) {
  return ptf.derived(Record.class,
      new CSV2RecordMapFn(delim),
      new Record2CSVMapFn(delim),
      ptf.strings());
}

開發者ID:apsaltis，項目名稱:oryx，代碼行數:7，代碼來源:MLRecords.java

示例9: inMemory

import org.apache.crunch.types.PTypeFamily; //導入依賴的package包/類
/**
 * Calculate a set of percentiles for each key in a numerically-valued table.
 *
 * Percentiles are calculated on a per-key basis by grouping, reading the data into memory, then sorting and
 * and calculating. This is much faster than the distributed option, but if you get into the order of 10M+ per key, then
 * performance might start to degrade or even cause OOMs.
 *
 * The percentile definition that we use here is the "nearest rank" defined here:
 * http://en.wikipedia.org/wiki/Percentile#Definition
 *
 * @param table numerically-valued PTable
 * @param p1 First percentile (in the range 0.0 - 1.0)
 * @param pn More percentiles (in the range 0.0 - 1.0)
 * @param <K> Key type of the table
 * @param <V> Value type of the table (must extends java.lang.Number)
 * @return PTable of each key with a collection of pairs of the percentile provided and it's result.
 */
public static <K, V extends Comparable> PTable<K, Result<V>> inMemory(PTable<K, V> table,
        double p1, double... pn) {
  final List<Double> percentileList = createListFromVarargs(p1, pn);

  PTypeFamily ptf = table.getTypeFamily();

  return table
          .groupByKey()
          .parallelDo(new InMemoryPercentiles<K, V>(percentileList),
                      ptf.tableOf(table.getKeyType(), Result.pType(table.getValueType())));
}

開發者ID:spotify，項目名稱:crunch-lib，代碼行數:29，代碼來源:Percentiles.java

注：本文中的org.apache.crunch.types.PTypeFamily類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。