当前位置: 首页>>代码示例>>Java>>正文


Java TDigest类代码示例

本文整理汇总了Java中com.clearspring.analytics.stream.quantile.TDigest的典型用法代码示例。如果您正苦于以下问题:Java TDigest类的具体用法?Java TDigest怎么用?Java TDigest使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


TDigest类属于com.clearspring.analytics.stream.quantile包,在下文中一共展示了TDigest类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: HeaderAndQuantileDataHolder

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
/**
 * Constructor
 * 
 * @param header the header with summary attributes
 * @param quantileEstimators a map of TDigest quantile estimators keyed by
 *          attribute name
 */
public HeaderAndQuantileDataHolder(Instances header,
  Map<String, TDigest> quantileEstimators) {

  m_header = header;

  if (quantileEstimators != null && quantileEstimators.size() > 0) {
    m_encodedQuantileEstimators =
      new HashMap<String, byte[]>(quantileEstimators.size());
    for (Map.Entry<String, TDigest> q : quantileEstimators.entrySet()) {
      ByteBuffer buff = ByteBuffer.allocate(q.getValue().byteSize());
      q.getValue().asSmallBytes(buff);
      m_encodedQuantileEstimators.put(q.getKey(), buff.array());
    }
  }
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:23,代码来源:CSVToARFFHeaderMapTask.java

示例2: getQuantileEstimator

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
/**
 * Return a decoded TDigest quantile estimator
 * 
 * @param attributeName the name of the attribute to get the estimator for
 * @return the decoded estimator
 * @throws DistributedWekaException if there are no quantile estimators or
 *           the named one is not in the map
 */
public TDigest getQuantileEstimator(String attributeName)
  throws DistributedWekaException {
  if (m_encodedQuantileEstimators == null
    || m_encodedQuantileEstimators.size() == 0) {
    throw new DistributedWekaException("No quantile estimators!");
  }

  byte[] encoded = m_encodedQuantileEstimators.get(attributeName);

  if (encoded == null) {
    throw new DistributedWekaException(
      "Can't find a quantile estimator for attribute '" + attributeName
        + "'");
  }

  ByteBuffer buff = ByteBuffer.wrap(encoded);
  TDigest returnVal = TDigest.fromBytes(buff);

  return returnVal;
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:29,代码来源:CSVToARFFHeaderMapTask.java

示例3: onEvent

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
@Override
public void onEvent(TwoPhaseEvent<Void> event) throws Exception {
    item.clear();
    format.parse(event.input(), item, event.lineNo());
    for (int i = 0; i < item.numericalIndexes().size(); i++) {
        long index = item.numericalIndexes().getLong(i);
        double value = item.numericalValues().getDouble(i);
        if (!digests.containsKey(index)) {
            digests.put(index, new TDigest(100));
        }
        counts.put(index, counts.get(index) + 1);
        minimums.put(index, Math.min(minimums.get(index), value));
        maximums.put(index, Math.max(maximums.get(index), value));
        digests.get(index).add(value);
    }
}
 
开发者ID:scaled-ml,项目名称:Scaled-ML,代码行数:17,代码来源:StatisticsWorkHandler.java

示例4: testBuildBinning

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
@Test
public void testBuildBinning() {
    TDigest digest = new TDigest(100);
    for (int i = 0; i < 1000; i++) {
        digest.add(ThreadLocalRandom.current().nextDouble());
    }
    NumericalFeaturesStatistics st = new NumericalFeaturesStatistics()
            .percentsHistogramStep(0.01);
    Binning binning = st.buildBinning(digest, 0.);
    assertEquals(0, binning.getInsertionPoint(0.));
    assertEquals(-1, binning.getInsertionPoint(-0.1));
    assertEquals(99, binning.getInsertionPoint(1.));
    assertEquals(99, binning.getInsertionPoint(Double.MAX_VALUE));
    int middleInsertion = binning.getInsertionPoint(0.5);
    assertTrue("middleInsertion is " + middleInsertion, middleInsertion > 45 && middleInsertion < 55);
    int quarterInsertion = binning.getInsertionPoint(0.25);
    assertTrue("quarterInsertion is " + quarterInsertion, quarterInsertion > 20 && quarterInsertion < 30);
    int thirdQuarterInsertion = binning.getInsertionPoint(0.75);
    assertTrue("thirdQuarterInsertion is " + thirdQuarterInsertion, thirdQuarterInsertion > 70 && thirdQuarterInsertion < 80);
}
 
开发者ID:scaled-ml,项目名称:Scaled-ML,代码行数:21,代码来源:NumericalFeaturesStatisticsTest.java

示例5: getHeaderAndQuantileEstimators

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
/**
 * Get the header information and the encoded quantile estimators
 *
 * @return a holder instance containing both the header information and
 *         encoded quantile estimators
 * @throws DistributedWekaException if we are not computing summary statistics
 *           or we are computing statistics but not quantiles
 */
public HeaderAndQuantileDataHolder getHeaderAndQuantileEstimators()
  throws DistributedWekaException {
  if (!m_computeSummaryStats) {
    throw new DistributedWekaException("No summary stats computed!");
  }

  if (!m_estimateQuantiles) {
    throw new DistributedWekaException("No quantile information computed!");
  }

  Map<String, TDigest> quantileMap = new HashMap<String, TDigest>();
  for (int i = 0; i < m_attributeTypes.length; i++) {
    if (m_attributeTypes[i] == TYPE.NUMERIC
      || m_attributeTypes[i] == TYPE.DATE) {
      NumericStats ns =
        (NumericStats) m_summaryStats.get(m_attributeNames.get(i));

      if (ns.getQuantileEstimator() != null) {
        quantileMap.put(m_attributeNames.get(i), ns.getQuantileEstimator());
      }
    }
  }

  HeaderAndQuantileDataHolder holder =
    new HeaderAndQuantileDataHolder(getHeader(), quantileMap);
  return holder;
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:36,代码来源:CSVToARFFHeaderMapTask.java

示例6: deSerializeCurrentQuantileEstimator

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
/**
 * Decode the current TDigest quatile estimator
 */
public void deSerializeCurrentQuantileEstimator() {
  if (m_encodedTDigestEstimator != null) {
    ByteBuffer buff = ByteBuffer.wrap(m_encodedTDigestEstimator);
    m_quantileEstimator = TDigest.fromBytes(buff);
    m_encodedTDigestEstimator = null;
  }
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:11,代码来源:NumericStats.java

示例7: buildBinning

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
private synchronized Long2ObjectMap<Binning> buildBinning() {
    Long2ObjectMap<Binning> newBinnings = new Long2ObjectLinkedOpenHashMap<>();
    for (long index : digests.keySet()) {
        TDigest digest = digests.get(index);
        double min = minimums.get(index);
        Binning binning = buildBinning(digest, min);
        newBinnings.put(index, binning);
    }
    return newBinnings;
}
 
开发者ID:scaled-ml,项目名称:Scaled-ML,代码行数:11,代码来源:NumericalFeaturesStatistics.java

示例8: combine

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
/**
 * Performs a "combine" operation using the supplied partial
 * CSVToARFFHeaderMapTask tasks. This is essentially a reduce operation, but
 * returns a single CSVToARFFHeaderMapTask object (rather than the final
 * header that is produced by CSVToARFFHeaderReduceTask). This allows several
 * reduce stages to be implemented (if desired) or partial reduces to occur in
 * parallel.
 *
 * @param tasks a list of CSVToARFFHeaderMapTasks to "combine"
 * @return a CSVToARFFHeaderMapTask with the merged state
 * @throws DistributedWekaException if a problem occurs
 */
public static CSVToARFFHeaderMapTask combine(
  List<CSVToARFFHeaderMapTask> tasks) throws DistributedWekaException {
  if (tasks == null || tasks.size() == 0) {
    throw new DistributedWekaException(
      "[CSVToARFFHeaderMapTask:combine] no tasks to combine!");
  }
  if (tasks.size() == 1) {
    return tasks.get(0);
  }

  Instances combinedHeaders = null;
  CSVToARFFHeaderMapTask master = tasks.get(0);
  List<Instances> toCombine = new ArrayList<Instances>();
  for (int i = 0; i < tasks.size(); i++) {
    toCombine.add(tasks.get(i).getHeader());
  }
  combinedHeaders = CSVToARFFHeaderReduceTask.aggregate(toCombine);

  Map<String, TDigest> mergedDigests = new HashMap<String, TDigest>();
  if (master.getComputeQuartilesAsPartOfSummaryStats()) {
    Instances headerNoSummary =
      CSVToARFFHeaderReduceTask.stripSummaryAtts(combinedHeaders);

    for (int i = 0; i < headerNoSummary.numAttributes(); i++) {
      List<TDigest> digestsToMerge = new ArrayList<TDigest>();
      String attName = headerNoSummary.attribute(i).name();

      for (CSVToARFFHeaderMapTask t : tasks) {
        Stats ns = t.m_summaryStats.get(attName);
        if (ns instanceof NumericStats) {
          TDigest partialEstimator =
            ((NumericStats) ns).getQuantileEstimator();
          if (partialEstimator != null) {
            digestsToMerge.add(partialEstimator);
          }
        }

        // HeaderAndQuantileDataHolder h =
        // t.getHeaderAndQuantileEstimators();
        // TDigest partialEstimator =
        // h.getQuantileEstimator(attName);
        // if (partialEstimator != null) {
        // digestsToMerge.add(partialEstimator);
        // }
      }

      if (digestsToMerge.size() > 0) {
        TDigest mergedForAtt =
          TDigest.merge(digestsToMerge.get(0).compression(), digestsToMerge);
        mergedDigests.put(attName, mergedForAtt);
      }
    }
  }

  // need to re-construct master now that we've (potentially) resolved
  // type conflicts within this combine operation
  master.fromHeader(combinedHeaders, mergedDigests);

  return master;
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:73,代码来源:CSVToARFFHeaderMapTask.java

示例9: testProcessRoundTripWithQuantiles

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
@Test
public void testProcessRoundTripWithQuantiles() throws Exception {
  CSVToARFFHeaderMapTask task = new CSVToARFFHeaderMapTask();
  task.setComputeQuartilesAsPartOfSummaryStats(true);
  task.setCompressionLevelForQuartileEstimation(80.0);

  BufferedReader br = new BufferedReader(new StringReader(IRIS));

  String line = br.readLine();
  String[] names = line.split(",");
  List<String> attNames = new ArrayList<String>();
  for (String s : names) {
    attNames.add(s);
  }

  while ((line = br.readLine()) != null) {
    task.processRow(line, attNames);
  }

  br.close();

  HeaderAndQuantileDataHolder holder = task.getHeaderAndQuantileEstimators();
  List<HeaderAndQuantileDataHolder> holderList =
    new ArrayList<HeaderAndQuantileDataHolder>();
  holderList.add(holder);
  Instances header =
    CSVToARFFHeaderReduceTask.aggregateHeadersAndQuartiles(holderList);

  Map<String, TDigest> estimators = new HashMap<String, TDigest>();
  Instances headerNoSummary =
    CSVToARFFHeaderReduceTask.stripSummaryAtts(header);
  for (int i = 0; i < headerNoSummary.numAttributes(); i++) {
    if (header.attribute(i).isNumeric()) {
      estimators.put(headerNoSummary.attribute(i).name(),
        holder.getQuantileEstimator(header.attribute(i).name()));
    }
  }

  CSVToARFFHeaderMapTask fresh = new CSVToARFFHeaderMapTask();
  fresh.setComputeQuartilesAsPartOfSummaryStats(true);
  fresh.setCompressionLevelForQuartileEstimation(80.0);
  fresh.fromHeader(header, estimators);
  holderList.clear();
  holder = fresh.getHeaderAndQuantileEstimators();
  holderList.add(holder);
  Instances freshHeader =
    CSVToARFFHeaderReduceTask.aggregateHeadersAndQuartiles(holderList);

  // check a few quantiles
  Attribute origSepallength =
    header.attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX
      + "sepallength");
  Attribute freshSepallength =
    freshHeader
      .attribute(CSVToARFFHeaderMapTask.ARFF_SUMMARY_ATTRIBUTE_PREFIX
        + "sepallength");

  assertEquals(
    ArffSummaryNumericMetric.FIRSTQUARTILE.valueFromAttribute(origSepallength),
    ArffSummaryNumericMetric.FIRSTQUARTILE
      .valueFromAttribute(freshSepallength),
    0.000001);

  assertEquals(
    ArffSummaryNumericMetric.THIRDQUARTILE.valueFromAttribute(origSepallength),
    ArffSummaryNumericMetric.THIRDQUARTILE
      .valueFromAttribute(freshSepallength),
    0.000001);

  assertEquals(
    ArffSummaryNumericMetric.MEDIAN.valueFromAttribute(origSepallength),
    ArffSummaryNumericMetric.MEDIAN
      .valueFromAttribute(freshSepallength),
    0.000001);
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:76,代码来源:CSVToARFFHeaderMapTaskTest.java

示例10: digests

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
public Long2ObjectMap<TDigest> digests() {
    return digests;
}
 
开发者ID:scaled-ml,项目名称:Scaled-ML,代码行数:4,代码来源:StatisticsWorkHandler.java

示例11: runAlgorithm

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
protected void runAlgorithm(long[] numbers) {
  tdigest = new TDigest(COMPRESSION_FACTOR, new Random());
  for(int i = 0; i < numbers.length; i++) {
    tdigest.add(numbers[i]);
  }
}
 
开发者ID:DemandCube,项目名称:NeverwinterDP-Commons,代码行数:7,代码来源:AlgorithmComparatorUnitTest.java

示例12: getQuantileEstimator

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
/**
 * Get the quantile estimator in use (if any)
 * 
 * @return the quantile estmator
 */
public TDigest getQuantileEstimator() {
  return m_quantileEstimator;
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:NumericStats.java

示例13: setQuantileEstimator

import com.clearspring.analytics.stream.quantile.TDigest; //导入依赖的package包/类
/**
 * Set the quantile estimator to use
 * 
 * @param estimator the estimator to use
 */
public void setQuantileEstimator(TDigest estimator) {
  m_quantileEstimator = estimator;
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:NumericStats.java


注:本文中的com.clearspring.analytics.stream.quantile.TDigest类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。