当前位置: 首页>>代码示例>>Java>>正文


Java Instance.getFeatures方法代码示例

本文整理汇总了Java中org.cleartk.ml.Instance.getFeatures方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.getFeatures方法的具体用法?Java Instance.getFeatures怎么用?Java Instance.getFeatures使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.cleartk.ml.Instance的用法示例。


在下文中一共展示了Instance.getFeatures方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: transform

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) {
  List<Feature> features = new ArrayList<Feature>();
  List<Feature> featuresToTransform = new ArrayList<Feature>();
  for (Feature feature : instance.getFeatures()) {
    if (this.isTransformable(feature)) {
      // Store off features for later similarity computation
      featuresToTransform.addAll(((TransformableFeature) feature).getFeatures());
    } else {
      // pass through non-transformable features
      features.add(feature);
    }
  }

  // Create centroid similarity feature
  Map<String, Double> featureMap = this.featuresToFeatureMap(featuresToTransform);
  features.add(new Feature(this.name, new Double(this.simFunction.distance(
      featureMap,
      centroidMap))));

  return new Instance<OUTCOME_T>(instance.getOutcome(), features);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:23,代码来源:CentroidTfidfSimilarityExtractor.java

示例2: createIdfMap

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
protected IDFMap createIdfMap(Iterable<Instance<OUTCOME_T>> instances) {
  IDFMap newIdfMap = new IDFMap();

  // Add instance's term frequencies to the global counts
  for (Instance<OUTCOME_T> instance : instances) {

    Set<String> featureNames = new HashSet<String>();
    // Grab the matching tf*idf features from the set of all features in an instance
    for (Feature feature : instance.getFeatures()) {
      if (this.isTransformable(feature)) {
        // tf*idf features contain a list of features, these are actually what get added
        // to our document frequency map
        for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) {
          featureNames.add(untransformedFeature.getName());
        }
      }
    }

    for (String featureName : featureNames) {
      newIdfMap.add(featureName);
    }
    newIdfMap.incTotalDocumentCount();

  }
  return newIdfMap;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:TfidfExtractor.java

示例3: train

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public void train(Iterable<Instance<OUTCOME_T>> instances) {
  // aggregate statistics for all features and classes
  this.mutualInfoStats = new MutualInformationStats<OUTCOME_T>(this.smoothingCount);

  for (Instance<OUTCOME_T> instance : instances) {
    OUTCOME_T outcome = instance.getOutcome();
    for (Feature feature : instance.getFeatures()) {
      if (this.isTransformable(feature)) {
        for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) {
          mutualInfoStats.update(this.nameFeature(untransformedFeature), outcome, 1);
        }
      }
    }
  }
  // Compute mutual information score for each feature
  Set<String> featureNames = mutualInfoStats.classConditionalCounts.rowKeySet();

  this.selectedFeatures = Ordering.natural().onResultOf(
      this.mutualInfoStats.getScoreFunction(
          this.combineScoreMethod)).reverse().immutableSortedCopy(featureNames);
  this.isTrained = true;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:MutualInformationFeatureSelectionExtractor.java

示例4: write

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
public void write(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException {
  if (this.delegatedDataWriter == null)
    throw new IllegalStateException(
        "delegatedDataWriter must be set before calling writeSequence");

  List<Object> outcomes = new ArrayList<Object>();
  for (Instance<OUTCOME_TYPE> instance : instances) {
    List<Feature> instanceFeatures = instance.getFeatures();
    for (OutcomeFeatureExtractor outcomeFeatureExtractor : outcomeFeatureExtractors) {
      instanceFeatures.addAll(outcomeFeatureExtractor.extractFeatures(outcomes));
    }
    outcomes.add(instance.getOutcome());
    delegatedDataWriter.write(instance);
  }

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:ViterbiDataWriter.java

示例5: testScript

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testScript() throws Exception {

  // create the data writer
  StringAnnotator annotator = new StringAnnotator();
  annotator.initialize(UimaContextFactory.createUimaContext(DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      this.outputDirectoryName, DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      ScriptStringOutcomeDataWriter.class.getName(), ScriptStringOutcomeDataWriter.PARAM_SCRIPT_DIRECTORY,
      "scripts/test/"));

  // run process to produce a bunch of instances
  annotator.process(null);

  annotator.collectionProcessComplete();

  // check that the output files were written for each class
  BufferedReader reader = new BufferedReader(
      new FileReader(new File(this.outputDirectoryName, "training-data.libsvm")));
  Assert.assertTrue(reader.readLine().length() > 0);
  reader.close();

  // run the training command
  JarClassifierBuilder.trainAndPackage(this.outputDirectory);

  reader = new BufferedReader(new FileReader(new File(this.outputDirectoryName, "script.model")));
  Assert.assertTrue(reader.readLine().trim().equals("test"));
  reader.close();

  // read in the classifier and test it on new instances
  ScriptStringOutcomeClassifierBuilder builder = new ScriptStringOutcomeClassifierBuilder();
  ScriptStringOutcomeClassifier classifier;
  classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
  for (Instance<String> instance : ExampleInstanceFactory.generateStringInstances(1000)) {
    List<Feature> features = instance.getFeatures();
    String encoded = classifier.featuresToString(features);
    Assert.assertTrue(encoded.equals(classifier.classify(features)));
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:39,代码来源:ScriptTest.java

示例6: getFeatureValues

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
private List<String> getFeatureValues(Instance<String> instance) {
  List<String> values = new ArrayList<String>();
  for (Feature feature : instance.getFeatures()) {
    Object value = feature == null ? null : feature.getValue();
    values.add(value == null ? null : value.toString());
  }
  return values;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:9,代码来源:ExamplePosAnnotatorTest.java

示例7: computeCentroid

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
protected Map<String, Double> computeCentroid(Iterable<Instance<OUTCOME_T>> instances, IDFMap idfs) {

    // Now compute centroid of all applicable terms (features) in all instances
    int numDocuments = idfs.getTotalDocumentCount();
    Map<String, Double> newCentroidMap = new HashMap<String, Double>();
    for (Instance<OUTCOME_T> instance : instances) {

      // Grab the matching tf*idf features from the set of all features in an instance
      for (Feature feature : instance.getFeatures()) {
        if (this.isTransformable(feature)) {
          // tf*idf features contain a list of features, these are actually what get added
          // to our document frequency map
          for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) {
            String termName = untransformedFeature.getName();
            int tf = (Integer) untransformedFeature.getValue();
            double tfidf = tf * idfs.getIDF(termName);
            double sumTfidf = (newCentroidMap.containsKey(termName))
                ? sumTfidf = newCentroidMap.get(termName)
                : 0.0;
            newCentroidMap.put(termName, sumTfidf + tfidf);
          }
        }
      }
    }

    for (Map.Entry<String, Double> entry : newCentroidMap.entrySet()) {
      double mean = entry.getValue() / numDocuments;
      newCentroidMap.put(entry.getKey(), mean);
    }
    return newCentroidMap;
  }
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:32,代码来源:CentroidTfidfSimilarityExtractor.java

示例8: transform

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) {
  List<Feature> features = new ArrayList<Feature>();
  for (Feature feature : instance.getFeatures()) {
    if (this.isTransformable(feature)) {
      for (Feature origFeature : ((TransformableFeature) feature).getFeatures()) {
        features.add(this.transform(origFeature));
      }
    } else {
      features.add(feature);
    }
  }
  return new Instance<OUTCOME_T>(instance.getOutcome(), features);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:15,代码来源:OneToOneTrainableExtractor_ImplBase.java

示例9: transform

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) {
  List<Feature> features = new ArrayList<Feature>();
  for (Feature feature : instance.getFeatures()) {
    if (this.isTransformable(feature)) {
      // Filter down to selected features
      features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this));
    } else {
      // Pass non-relevant features through w/o filtering
      features.add(feature);
    }
  }
  return new Instance<OUTCOME_T>(instance.getOutcome(), features);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:15,代码来源:FeatureSelectionExtractor.java

示例10: testBinaryLIBSVM

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testBinaryLIBSVM() throws Exception {
  // create the data writer
  BooleanAnnotator annotator = new BooleanAnnotator();
  annotator.initialize(UimaContextFactory.createUimaContext(
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      this.outputDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      LibSvmBooleanOutcomeDataWriter.class.getName()));

  // run process to produce a bunch of instances
  annotator.process(null);

  annotator.collectionProcessComplete();

  // check that the output file was written and is not empty
  BufferedReader reader = new BufferedReader(new FileReader(new File(
      this.outputDirectoryName,
      "training-data.libsvm")));
  Assert.assertTrue(reader.readLine().length() > 0);
  reader.close();

  // run the training command
  HideOutput hider = new HideOutput();
  Train.main(this.outputDirectoryName, "-c", "1.0", "-s", "0", "-t", "0");
  hider.restoreOutput();

  // read in the classifier and test it on new instances
  LibSvmBooleanOutcomeClassifierBuilder builder = new LibSvmBooleanOutcomeClassifierBuilder();
  LibSvmBooleanOutcomeClassifier classifier;
  classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
  for (Instance<Boolean> instance : ExampleInstanceFactory.generateBooleanInstances(1000)) {
    List<Feature> features = instance.getFeatures();
    Boolean outcome = instance.getOutcome();
    Assert.assertEquals(outcome, classifier.classify(features));

    Map<Boolean, Double> scoredOutcomes = classifier.score(features);
    Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(!outcome));
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:41,代码来源:LibSvmTest.java

示例11: testBooleanOutcomeLIBLINEAR

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testBooleanOutcomeLIBLINEAR() throws Exception {
  // create the data writer
  BooleanAnnotator annotator = new BooleanAnnotator();
  annotator.initialize(UimaContextFactory.createUimaContext(
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      this.outputDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      LibLinearBooleanOutcomeDataWriter.class.getName()));

  // run process to produce a bunch of instances
  annotator.process(null);

  annotator.collectionProcessComplete();

  // check that the output file was written and is not empty
  LibLinearBooleanOutcomeClassifierBuilder builder = new LibLinearBooleanOutcomeClassifierBuilder();
  BufferedReader reader = new BufferedReader(new FileReader(
      builder.getTrainingDataFile(this.outputDirectory)));
  Assert.assertTrue(reader.readLine().length() > 0);
  reader.close();

  // run the training command
  HideOutput hider = new HideOutput();
  Train.main(this.outputDirectoryName, "-c", "1.0", "-s", "1");
  hider.restoreOutput();

  // read in the classifier and test it on new instances
  LibLinearBooleanOutcomeClassifier classifier;
  classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
  for (Instance<Boolean> instance : ExampleInstanceFactory.generateBooleanInstances(1000)) {
    List<Feature> features = instance.getFeatures();
    Boolean outcome = instance.getOutcome();
    Assert.assertEquals(outcome, classifier.classify(features));
    Map<Boolean, Double> scoredOutcomes = classifier.score(features);
    Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(!outcome));
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:39,代码来源:LibLinearTest.java

示例12: testMultiClassLIBSVM

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testMultiClassLIBSVM() throws Exception {
  // create the data writer
  StringAnnotator annotator = new StringAnnotator();
  annotator.initialize(UimaContextFactory.createUimaContext(
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      this.outputDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      LibSvmStringOutcomeDataWriter.class.getName()));

  // run process to produce a bunch of instances
  annotator.process(null);

  annotator.collectionProcessComplete();

  // check that the output files were written for each class
  BufferedReader reader = new BufferedReader(new FileReader(new File(
      this.outputDirectoryName,
      "training-data.libsvm")));
  Assert.assertTrue(reader.readLine().length() > 0);
  reader.close();

  // run the training command
  HideOutput hider = new HideOutput();
  Train.main(this.outputDirectoryName, "-c", "10", "-t", "0");
  hider.restoreOutput();

  // read in the classifier and test it on new instances
  LibSvmStringOutcomeClassifierBuilder builder = new LibSvmStringOutcomeClassifierBuilder();
  LibSvmStringOutcomeClassifier classifier;
  classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
  for (Instance<String> instance : ExampleInstanceFactory.generateStringInstances(1000)) {
    List<Feature> features = instance.getFeatures();
    String outcome = instance.getOutcome();
    Assert.assertEquals(outcome, classifier.classify(features));

    Map<String, Double> scoredOutcomes = classifier.score(features);
    for (String otherOutcome : Arrays.asList("A", "B", "C")) {
      if (!otherOutcome.equals(outcome)) {
        Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(otherOutcome));
      }
    }
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:45,代码来源:LibSvmTest.java

示例13: testStringOutcomeLIBLINEAR

import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testStringOutcomeLIBLINEAR() throws Exception {
  // create the data writer
  StringAnnotator annotator = new StringAnnotator();
  annotator.initialize(UimaContextFactory.createUimaContext(
      DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
      this.outputDirectoryName,
      DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
      LibLinearStringOutcomeDataWriter.class.getName()));

  // run process to produce a bunch of instances
  annotator.process(null);

  annotator.collectionProcessComplete();

  // check that the output files were written for each class
  BufferedReader reader = new BufferedReader(new FileReader(new File(
      this.outputDirectoryName,
      "training-data.liblinear")));
  Assert.assertTrue(reader.readLine().length() > 0);
  reader.close();

  // run the training command
  HideOutput hider = new HideOutput();
  Train.main(this.outputDirectoryName, "-c", "1.0", "-s", "0");
  hider.restoreOutput();

  // read in the classifier and test it on new instances
  LibLinearStringOutcomeClassifierBuilder builder = new LibLinearStringOutcomeClassifierBuilder();
  LibLinearStringOutcomeClassifier classifier;
  classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
  for (Instance<String> instance : ExampleInstanceFactory.generateStringInstances(1000)) {
    List<Feature> features = instance.getFeatures();
    String outcome = instance.getOutcome();
    Assert.assertEquals(outcome, classifier.classify(features));

    Map<String, Double> scoredOutcomes = classifier.score(features);
    for (String otherOutcome : Arrays.asList("A", "B", "C")) {
      if (!otherOutcome.equals(outcome)) {
        Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(otherOutcome));
      }
    }
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:45,代码来源:LibLinearTest.java


注:本文中的org.cleartk.ml.Instance.getFeatures方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。