当前位置: 首页>>代码示例>>Java>>正文


Java Datum类代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.Datum的典型用法代码示例。如果您正苦于以下问题:Java Datum类的具体用法?Java Datum怎么用?Java Datum使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Datum类属于edu.stanford.nlp.ling包,在下文中一共展示了Datum类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: trainRVF

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public LinearClassifier trainRVF(List<HashMap<String, Double>> list_feature2values,
		List<String> list_labels) {
	List<Datum<String, String>> trainingData = new ArrayList<Datum<String, String>>();
	for (int i = 0; i < list_feature2values.size(); i++) {
		HashMap<String, Double> feature2values = list_feature2values.get(i);
		String label = list_labels.get(i);
		Datum<String, String> d = new RVFDatum(Counters.fromMap(feature2values), label);
		trainingData.add(d);
	}
	// Build a classifier factory
	LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<String, String>();
	factory.setSigma(3);
	factory.setEpsilon(15);
	factory.useQuasiNewton();
	factory.setVerbose(true);
	LinearClassifier<String, String> classifier = factory.trainClassifier(trainingData);
	// {
	// ArrayList<String> temp = new ArrayList<String>();
	// temp.add("NS=" + GREEN);
	// System.out.println(classifier.scoreOf(new BasicDatum<String,
	// String>(temp, BROKEN), BROKEN));
	// }

	this.classifier = classifier;
	return classifier;
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:27,代码来源:StanfordRegression.java

示例2: score

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public <F> double score(ProbabilisticClassifier<L,F> classifier, GeneralDataset<L,F> data) {

    ArrayList<Pair<Double, Integer>> dataScores = new ArrayList<Pair<Double, Integer>>();
    for (int i = 0; i < data.size(); i++) {
      Datum<L,F> d = data.getRVFDatum(i);
      Counter<L> scores = classifier.logProbabilityOf(d);
      int labelD = d.label().equals(posLabel) ? 1 : 0;
      dataScores.add(new Pair<Double, Integer>(Math.exp(scores.getCount(posLabel)), labelD));
    }

    PRCurve prc = new PRCurve(dataScores);

    confWeightedAccuracy = prc.cwa();
    accuracy = prc.accuracy();
    optAccuracy = prc.optimalAccuracy();
    optConfWeightedAccuracy = prc.optimalCwa();
    logLikelihood = prc.logLikelihood();
    accrecall = prc.cwaArray();
    optaccrecall = prc.optimalCwaArray();

    return accuracy;
  }
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:23,代码来源:AccuracyStats.java

示例3: trainClassifier

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public Classifier<L, F> trainClassifier(Iterable<Datum<L, F>> dataIterable) {
  Minimizer<DiffFunction> minimizer = getMinimizer();
  Index<F> featureIndex = Generics.newIndex();
  Index<L> labelIndex = Generics.newIndex();
  for (Datum<L, F> d : dataIterable) {
    labelIndex.add(d.label());
    featureIndex.addAll(d.asFeatures());//If there are duplicates, it doesn't add them again.
  }
  System.err.println(String.format("Training linear classifier with %d features and %d labels", featureIndex.size(), labelIndex.size()));

  LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<L, F>(dataIterable, logPrior, featureIndex, labelIndex);
  objective.setPrior(new LogPrior(LogPrior.LogPriorType.QUADRATIC));

  double[] initial = objective.initial();
  double[] weights = minimizer.minimize(objective, TOL, initial);

  LinearClassifier<L, F> classifier = new LinearClassifier<L, F>(objective.to2D(weights), featureIndex, labelIndex);
  return classifier;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:20,代码来源:LinearClassifierFactory.java

示例4: updateDerivative

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
private void updateDerivative(Datum<L,F> datum, double[] probs,Counter<Triple<Integer,Integer,Integer>> feature2classPairDerivatives){
  for (F feature : datum.asFeatures()) {
    int fID = labeledDataset.featureIndex.indexOf(feature);
    if (fID >= 0) {
      for (int c = 0; c < numClasses; c++) {
        for (int cPrime = 0; cPrime < numClasses; cPrime++) {
          if (cPrime == c) {
            feature2classPairDerivatives.incrementCount(new Triple<Integer,Integer,Integer>(fID,c,cPrime), - probs[c]*(1-probs[c])*valueOfFeature(feature,datum));
          } else {
            feature2classPairDerivatives.incrementCount(new Triple<Integer,Integer,Integer>(fID,c,cPrime), probs[c]*probs[cPrime]*valueOfFeature(feature,datum));
          }
        }
      }
    }
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:17,代码来源:GeneralizedExpectationObjectiveFunction.java

示例5: mapDataset

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
  *
  * @param dataset
  * @return a new GeneralDataset whose features and ids map exactly to those of this GeneralDataset. But labels are converted to be another set of labels
  */
 public <L2> GeneralDataset<L2,F> mapDataset(GeneralDataset<L,F> dataset, Index<L2> newLabelIndex, Map<L,L2> labelMapping, L2 defaultLabel)
{
   GeneralDataset<L2,F> newDataset;
   if(dataset instanceof RVFDataset)
     newDataset = new RVFDataset<L2,F>(this.featureIndex, newLabelIndex);
   else newDataset = new Dataset<L2,F>(this.featureIndex, newLabelIndex);
   this.featureIndex.lock();
   this.labelIndex.lock();
   //System.out.println("inside mapDataset: dataset size:"+dataset.size());
   for(int i = 0; i < dataset.size(); i++)  {
     //System.out.println("inside mapDataset: adding datum number"+i);
     Datum<L,F> d = dataset.getDatum(i);
     Datum<L2,F> d2 = mapDatum(d, labelMapping, defaultLabel);
     newDataset.add(d2);
   }
   //System.out.println("old Dataset stats: numData:"+dataset.size()+" numfeatures:"+dataset.featureIndex().size()+" numlabels:"+dataset.labelIndex.size());
   //System.out.println("new Dataset stats: numData:"+newDataset.size()+" numfeatures:"+newDataset.featureIndex().size()+" numlabels:"+newDataset.labelIndex.size());
   //System.out.println("this dataset stats: numData:"+size()+" numfeatures:"+featureIndex().size()+" numlabels:"+labelIndex.size());

   this.featureIndex.unlock();
   this.labelIndex.unlock();
   return newDataset;
 }
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:29,代码来源:GeneralDataset.java

示例6: scoresOf

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/** Construct a counter with keys the labels of the classifier and
 *  values the score (unnormalized log probability) of each class.
 */
@Override
public Counter<L> scoresOf(Datum<L, F> example) {
  if(example instanceof RVFDatum<?, ?>)return scoresOfRVFDatum((RVFDatum<L,F>)example);
  Collection<F> feats = example.asFeatures();
  int[] features = new int[feats.size()];
  int i = 0;
  for (F f : feats) {
    int index = featureIndex.indexOf(f);
    if (index >= 0) {
      features[i++] = index;
    } else {
      //System.err.println("FEATURE LESS THAN ZERO: " + f);
    }
  }
  int[] activeFeatures = new int[i];
  System.arraycopy(features, 0, activeFeatures, 0, i);
  Counter<L> scores = new ClassicCounter<L>();
  for (L lab : labels()) {
    scores.setCount(lab, scoreOf(activeFeatures, lab));
  }
  return scores;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:26,代码来源:LinearClassifier.java

示例7: svmLightLineToDatum

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public static Datum<String, String> svmLightLineToDatum(String l) {
  line1++;
  l = l.replaceAll("#.*", ""); // remove any trailing comments
  String[] line = l.split("\\s+");
  Collection<String> features = new ArrayList<String>();
  for (int i = 1; i < line.length; i++) {
    String[] f = line[i].split(":");
    if (f.length != 2) {
      System.err.println("Dataset error: line " + line1);
    }
    int val = (int) Double.parseDouble(f[1]);
    for (int j = 0; j < val; j++) {
      features.add(f[0]);
    }
  }
  features.add(String.valueOf(Integer.MAX_VALUE));  // a constant feature for a class
  Datum<String, String> d = new BasicDatum<String, String>(features, line[0]);
  return d;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:20,代码来源:Dataset.java

示例8: main

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  ColumnDataClassifier columnDataClassifier = new ColumnDataClassifier("examples/cheese2007.prop");
  Classifier<String,String> classifier =
      columnDataClassifier.makeClassifier(columnDataClassifier.readTrainingExamples("examples/cheeseDisease.train"));
  for (String line : ObjectBank.getLineIterator("examples/cheeseDisease.test", "utf-8")) {
    Datum<String,String> d = columnDataClassifier.makeDatumFromLine(line);
    System.out.println(line + "  ==>  " + classifier.classOf(d));
  }
}
 
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:10,代码来源:StanfordClassifier.java

示例9: trainMultinomialClassifier

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
 * Train a multinomial classifier off of the provided dataset.
 *
 * @param dataset The dataset to train the classifier off of.
 * @return A classifier.
 */
public static Classifier<String, String> trainMultinomialClassifier(
        GeneralDataset<String, String> dataset,
        int featureThreshold,
        double sigma) {
    // Set up the dataset and factory
    log.info("Applying feature threshold (" + featureThreshold + ")...");
    dataset.applyFeatureCountThreshold(featureThreshold);
    log.info("Randomizing dataset...");
    dataset.randomize(42l);
    log.info("Creating factory...");
    LinearClassifierFactory<String, String> factory = initFactory(sigma);

    // Train the final classifier
    log.info("BEGIN training");
    LinearClassifier<String, String> classifier = factory.trainClassifier(dataset);
    log.info("END training");

    // Debug
    Accuracy trainAccuracy = new Accuracy();
    for (Datum<String, String> datum : dataset) {
        String guess = classifier.classOf(datum);
        trainAccuracy.predict(Collections.singleton(guess), Collections.singleton(datum.label()));
    }
    log.info("Training accuracy:");
    log.info(trainAccuracy.toString());
    log.info("");

    // Return the classifier
    return classifier;
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:37,代码来源:KBPStatisticalExtractor.java

示例10: trainMultinomialClassifier

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
 * Train a multinomial classifier off of the provided dataset.
 * @param dataset The dataset to train the classifier off of.
 * @return A classifier.
 */
public static Classifier<String, String> trainMultinomialClassifier(
    GeneralDataset<String, String> dataset,
    int featureThreshold,
    double sigma) {
  // Set up the dataset and factory
  log.info("Applying feature threshold (" + featureThreshold + ")...");
  dataset.applyFeatureCountThreshold(featureThreshold);
  log.info("Randomizing dataset...");
  dataset.randomize(42l);
  log.info("Creating factory...");
  LinearClassifierFactory<String,String> factory = initFactory(sigma);

  // Train the final classifier
  log.info("BEGIN training");
  LinearClassifier<String, String> classifier = factory.trainClassifier(dataset);
  log.info("END training");

  // Debug
  Accuracy trainAccuracy = new Accuracy();
  for (Datum<String, String> datum : dataset) {
    String guess = classifier.classOf(datum);
    trainAccuracy.predict(Collections.singleton(guess), Collections.singleton(datum.label()));
  }
  log.info("Training accuracy:");
  log.info(trainAccuracy.toString());
  log.info("");

  // Return the classifier
  return classifier;
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:36,代码来源:KBPStatisticalExtractor.java

示例11: trainBasic

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public LinearClassifier trainBasic(
		List<List<String>> list_features, List<String> list_labels) {
	List<Datum<String, String>> trainingData = new ArrayList<Datum<String, String>>();
	for (int i = 0; i < list_features.size(); i++) {
		List<String> features = list_features.get(i);
		String label = list_labels.get(i);
		Datum<String, String> d = new BasicDatum<String, String>(features, label);
		trainingData.add(d);
	}
	// Build a classifier factory
	LinearClassifierFactory<String, String> factory = new LinearClassifierFactory<String, String>();
	// factory.setTol(tol);
	// factory.setSigma(1);
	// factory.setEpsilon(0.01);
	// factory.useQuasiNewton();
	factory.setVerbose(true);
	LinearClassifier<String, String> classifier = factory.trainClassifier(trainingData);
	// {
	// ArrayList<String> temp = new ArrayList<String>();
	// temp.add("NS=" + GREEN);
	// System.out.println(classifier.scoreOf(new BasicDatum<String,
	// String>(temp, BROKEN), BROKEN));
	// }

	this.classifier = classifier;
	return classifier;
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:28,代码来源:StanfordRegression.java

示例12: scoreOf

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public Map<String, Double> scoreOf(List<String> features) {
	Datum<String, String> d = new BasicDatum<String, String>(features, "");
	HashMap<String, Double> label2score = new HashMap<String, Double>();
	Counter<String> c = classifier.scoresOf(d);
	for (String label : c.keySet()) {
		label2score.put(label, c.getCount(label));
	}
	return label2score;
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:10,代码来源:StanfordRegression.java

示例13: initMC

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public <F> void initMC(ProbabilisticClassifier<L,F> classifier, GeneralDataset<L,F> data) {
  //if (!(gData instanceof Dataset)) {
  //  throw new UnsupportedOperationException("Can only handle Datasets, not "+gData.getClass().getName());
  //}
  //
  //Dataset data = (Dataset)gData;

  PriorityQueue<Pair<Integer, Pair<Double, Boolean>>> q = new BinaryHeapPriorityQueue<Pair<Integer, Pair<Double, Boolean>>>();
  total = 0;
  correct = 0;
  logLikelihood = 0.0;
  for (int i = 0; i < data.size(); i++) {
    Datum<L,F> d = data.getRVFDatum(i);
    Counter<L> scores = classifier.logProbabilityOf(d);
    L guess = Counters.argmax(scores);
    L correctLab = d.label();
    double guessScore = scores.getCount(guess);
    double correctScore = scores.getCount(correctLab);
    int guessInd = data.labelIndex().indexOf(guess);
    int correctInd = data.labelIndex().indexOf(correctLab);

    total++;
    if (guessInd == correctInd) {
      correct++;
    }
    logLikelihood += correctScore;
    q.add(new Pair<Integer, Pair<Double, Boolean>>(Integer.valueOf(i), new Pair<Double, Boolean>(new Double(guessScore), Boolean.valueOf(guessInd == correctInd))), -guessScore);
  }
  accuracy = (double) correct / (double) total;
  List<Pair<Integer, Pair<Double, Boolean>>> sorted = q.toSortedList();
  scores = new double[sorted.size()];
  isCorrect = new boolean[sorted.size()];

  for (int i = 0; i < sorted.size(); i++) {
    Pair<Double, Boolean> next = sorted.get(i).second();
    scores[i] = next.first().doubleValue();
    isCorrect[i] = next.second().booleanValue();
  }

}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:41,代码来源:MultiClassAccuracyStats.java

示例14: score

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
public <F> double score(Classifier<L,F> classifier, GeneralDataset<L,F> data) {
  setLabelIndex(data.labelIndex);
  clearCounts();
  int[] labelsArr = data.getLabelsArray();
  for (int i = 0; i < data.size(); i++) {
    Datum<L, F> d = data.getRVFDatum(i);
    L guess = classifier.classOf(d);
    addGuess(guess, labelIndex.get(labelsArr[i]));
  }
  finalizeCounts();

  return getFMeasure();
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:14,代码来源:MultiClassPrecisionRecallExtendedStats.java

示例15: loglikelihood

import edu.stanford.nlp.ling.Datum; //导入依赖的package包/类
/**
 * Returns the log conditional likelihood of the given dataset.
 *
 * @return The log conditional likelihood of the given dataset.
 */
public double loglikelihood(List<IN> lineInfos) {
  double cll = 0.0;

  for (int i = 0; i < lineInfos.size(); i++) {
    Datum<String, String> d = makeDatum(lineInfos, i, featureFactory);
    Counter<String> c = classifier.logProbabilityOf(d);

    double total = Double.NEGATIVE_INFINITY;
    for (String s : c.keySet()) {
      total = SloppyMath.logAdd(total, c.getCount(s));
    }
    cll -= c.getCount(d.label()) - total;
  }
  // quadratic prior
  // HN: TODO: add other priors

  if (classifier instanceof LinearClassifier) {
    double sigmaSq = flags.sigma * flags.sigma;
    LinearClassifier<String, String> lc = (LinearClassifier<String, String>)classifier;
    for (String feature: lc.features()) {
      for (String classLabel: classIndex) {
        double w = lc.weight(feature, classLabel);
        cll += w * w / 2.0 / sigmaSq;
      }
    }
  }
  return cll;
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:34,代码来源:CMMClassifier.java


注:本文中的edu.stanford.nlp.ling.Datum类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。