本文整理汇总了Java中org.cleartk.ml.Instance.getFeatures方法的典型用法代码示例。如果您正苦于以下问题:Java Instance.getFeatures方法的具体用法?Java Instance.getFeatures怎么用?Java Instance.getFeatures使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.cleartk.ml.Instance
的用法示例。
在下文中一共展示了Instance.getFeatures方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: transform
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) {
List<Feature> features = new ArrayList<Feature>();
List<Feature> featuresToTransform = new ArrayList<Feature>();
for (Feature feature : instance.getFeatures()) {
if (this.isTransformable(feature)) {
// Store off features for later similarity computation
featuresToTransform.addAll(((TransformableFeature) feature).getFeatures());
} else {
// pass through non-transformable features
features.add(feature);
}
}
// Create centroid similarity feature
Map<String, Double> featureMap = this.featuresToFeatureMap(featuresToTransform);
features.add(new Feature(this.name, new Double(this.simFunction.distance(
featureMap,
centroidMap))));
return new Instance<OUTCOME_T>(instance.getOutcome(), features);
}
示例2: createIdfMap
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
protected IDFMap createIdfMap(Iterable<Instance<OUTCOME_T>> instances) {
IDFMap newIdfMap = new IDFMap();
// Add instance's term frequencies to the global counts
for (Instance<OUTCOME_T> instance : instances) {
Set<String> featureNames = new HashSet<String>();
// Grab the matching tf*idf features from the set of all features in an instance
for (Feature feature : instance.getFeatures()) {
if (this.isTransformable(feature)) {
// tf*idf features contain a list of features, these are actually what get added
// to our document frequency map
for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) {
featureNames.add(untransformedFeature.getName());
}
}
}
for (String featureName : featureNames) {
newIdfMap.add(featureName);
}
newIdfMap.incTotalDocumentCount();
}
return newIdfMap;
}
示例3: train
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public void train(Iterable<Instance<OUTCOME_T>> instances) {
// aggregate statistics for all features and classes
this.mutualInfoStats = new MutualInformationStats<OUTCOME_T>(this.smoothingCount);
for (Instance<OUTCOME_T> instance : instances) {
OUTCOME_T outcome = instance.getOutcome();
for (Feature feature : instance.getFeatures()) {
if (this.isTransformable(feature)) {
for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) {
mutualInfoStats.update(this.nameFeature(untransformedFeature), outcome, 1);
}
}
}
}
// Compute mutual information score for each feature
Set<String> featureNames = mutualInfoStats.classConditionalCounts.rowKeySet();
this.selectedFeatures = Ordering.natural().onResultOf(
this.mutualInfoStats.getScoreFunction(
this.combineScoreMethod)).reverse().immutableSortedCopy(featureNames);
this.isTrained = true;
}
示例4: write
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
public void write(List<Instance<OUTCOME_TYPE>> instances) throws CleartkProcessingException {
if (this.delegatedDataWriter == null)
throw new IllegalStateException(
"delegatedDataWriter must be set before calling writeSequence");
List<Object> outcomes = new ArrayList<Object>();
for (Instance<OUTCOME_TYPE> instance : instances) {
List<Feature> instanceFeatures = instance.getFeatures();
for (OutcomeFeatureExtractor outcomeFeatureExtractor : outcomeFeatureExtractors) {
instanceFeatures.addAll(outcomeFeatureExtractor.extractFeatures(outcomes));
}
outcomes.add(instance.getOutcome());
delegatedDataWriter.write(instance);
}
}
示例5: testScript
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testScript() throws Exception {
// create the data writer
StringAnnotator annotator = new StringAnnotator();
annotator.initialize(UimaContextFactory.createUimaContext(DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
this.outputDirectoryName, DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
ScriptStringOutcomeDataWriter.class.getName(), ScriptStringOutcomeDataWriter.PARAM_SCRIPT_DIRECTORY,
"scripts/test/"));
// run process to produce a bunch of instances
annotator.process(null);
annotator.collectionProcessComplete();
// check that the output files were written for each class
BufferedReader reader = new BufferedReader(
new FileReader(new File(this.outputDirectoryName, "training-data.libsvm")));
Assert.assertTrue(reader.readLine().length() > 0);
reader.close();
// run the training command
JarClassifierBuilder.trainAndPackage(this.outputDirectory);
reader = new BufferedReader(new FileReader(new File(this.outputDirectoryName, "script.model")));
Assert.assertTrue(reader.readLine().trim().equals("test"));
reader.close();
// read in the classifier and test it on new instances
ScriptStringOutcomeClassifierBuilder builder = new ScriptStringOutcomeClassifierBuilder();
ScriptStringOutcomeClassifier classifier;
classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
for (Instance<String> instance : ExampleInstanceFactory.generateStringInstances(1000)) {
List<Feature> features = instance.getFeatures();
String encoded = classifier.featuresToString(features);
Assert.assertTrue(encoded.equals(classifier.classify(features)));
}
}
示例6: getFeatureValues
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
private List<String> getFeatureValues(Instance<String> instance) {
List<String> values = new ArrayList<String>();
for (Feature feature : instance.getFeatures()) {
Object value = feature == null ? null : feature.getValue();
values.add(value == null ? null : value.toString());
}
return values;
}
示例7: computeCentroid
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
protected Map<String, Double> computeCentroid(Iterable<Instance<OUTCOME_T>> instances, IDFMap idfs) {
// Now compute centroid of all applicable terms (features) in all instances
int numDocuments = idfs.getTotalDocumentCount();
Map<String, Double> newCentroidMap = new HashMap<String, Double>();
for (Instance<OUTCOME_T> instance : instances) {
// Grab the matching tf*idf features from the set of all features in an instance
for (Feature feature : instance.getFeatures()) {
if (this.isTransformable(feature)) {
// tf*idf features contain a list of features, these are actually what get added
// to our document frequency map
for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) {
String termName = untransformedFeature.getName();
int tf = (Integer) untransformedFeature.getValue();
double tfidf = tf * idfs.getIDF(termName);
double sumTfidf = (newCentroidMap.containsKey(termName))
? sumTfidf = newCentroidMap.get(termName)
: 0.0;
newCentroidMap.put(termName, sumTfidf + tfidf);
}
}
}
}
for (Map.Entry<String, Double> entry : newCentroidMap.entrySet()) {
double mean = entry.getValue() / numDocuments;
newCentroidMap.put(entry.getKey(), mean);
}
return newCentroidMap;
}
示例8: transform
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) {
List<Feature> features = new ArrayList<Feature>();
for (Feature feature : instance.getFeatures()) {
if (this.isTransformable(feature)) {
for (Feature origFeature : ((TransformableFeature) feature).getFeatures()) {
features.add(this.transform(origFeature));
}
} else {
features.add(feature);
}
}
return new Instance<OUTCOME_T>(instance.getOutcome(), features);
}
示例9: transform
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Override
public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) {
List<Feature> features = new ArrayList<Feature>();
for (Feature feature : instance.getFeatures()) {
if (this.isTransformable(feature)) {
// Filter down to selected features
features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this));
} else {
// Pass non-relevant features through w/o filtering
features.add(feature);
}
}
return new Instance<OUTCOME_T>(instance.getOutcome(), features);
}
示例10: testBinaryLIBSVM
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testBinaryLIBSVM() throws Exception {
// create the data writer
BooleanAnnotator annotator = new BooleanAnnotator();
annotator.initialize(UimaContextFactory.createUimaContext(
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
this.outputDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
LibSvmBooleanOutcomeDataWriter.class.getName()));
// run process to produce a bunch of instances
annotator.process(null);
annotator.collectionProcessComplete();
// check that the output file was written and is not empty
BufferedReader reader = new BufferedReader(new FileReader(new File(
this.outputDirectoryName,
"training-data.libsvm")));
Assert.assertTrue(reader.readLine().length() > 0);
reader.close();
// run the training command
HideOutput hider = new HideOutput();
Train.main(this.outputDirectoryName, "-c", "1.0", "-s", "0", "-t", "0");
hider.restoreOutput();
// read in the classifier and test it on new instances
LibSvmBooleanOutcomeClassifierBuilder builder = new LibSvmBooleanOutcomeClassifierBuilder();
LibSvmBooleanOutcomeClassifier classifier;
classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
for (Instance<Boolean> instance : ExampleInstanceFactory.generateBooleanInstances(1000)) {
List<Feature> features = instance.getFeatures();
Boolean outcome = instance.getOutcome();
Assert.assertEquals(outcome, classifier.classify(features));
Map<Boolean, Double> scoredOutcomes = classifier.score(features);
Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(!outcome));
}
}
示例11: testBooleanOutcomeLIBLINEAR
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testBooleanOutcomeLIBLINEAR() throws Exception {
// create the data writer
BooleanAnnotator annotator = new BooleanAnnotator();
annotator.initialize(UimaContextFactory.createUimaContext(
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
this.outputDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
LibLinearBooleanOutcomeDataWriter.class.getName()));
// run process to produce a bunch of instances
annotator.process(null);
annotator.collectionProcessComplete();
// check that the output file was written and is not empty
LibLinearBooleanOutcomeClassifierBuilder builder = new LibLinearBooleanOutcomeClassifierBuilder();
BufferedReader reader = new BufferedReader(new FileReader(
builder.getTrainingDataFile(this.outputDirectory)));
Assert.assertTrue(reader.readLine().length() > 0);
reader.close();
// run the training command
HideOutput hider = new HideOutput();
Train.main(this.outputDirectoryName, "-c", "1.0", "-s", "1");
hider.restoreOutput();
// read in the classifier and test it on new instances
LibLinearBooleanOutcomeClassifier classifier;
classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
for (Instance<Boolean> instance : ExampleInstanceFactory.generateBooleanInstances(1000)) {
List<Feature> features = instance.getFeatures();
Boolean outcome = instance.getOutcome();
Assert.assertEquals(outcome, classifier.classify(features));
Map<Boolean, Double> scoredOutcomes = classifier.score(features);
Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(!outcome));
}
}
示例12: testMultiClassLIBSVM
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testMultiClassLIBSVM() throws Exception {
// create the data writer
StringAnnotator annotator = new StringAnnotator();
annotator.initialize(UimaContextFactory.createUimaContext(
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
this.outputDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
LibSvmStringOutcomeDataWriter.class.getName()));
// run process to produce a bunch of instances
annotator.process(null);
annotator.collectionProcessComplete();
// check that the output files were written for each class
BufferedReader reader = new BufferedReader(new FileReader(new File(
this.outputDirectoryName,
"training-data.libsvm")));
Assert.assertTrue(reader.readLine().length() > 0);
reader.close();
// run the training command
HideOutput hider = new HideOutput();
Train.main(this.outputDirectoryName, "-c", "10", "-t", "0");
hider.restoreOutput();
// read in the classifier and test it on new instances
LibSvmStringOutcomeClassifierBuilder builder = new LibSvmStringOutcomeClassifierBuilder();
LibSvmStringOutcomeClassifier classifier;
classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
for (Instance<String> instance : ExampleInstanceFactory.generateStringInstances(1000)) {
List<Feature> features = instance.getFeatures();
String outcome = instance.getOutcome();
Assert.assertEquals(outcome, classifier.classify(features));
Map<String, Double> scoredOutcomes = classifier.score(features);
for (String otherOutcome : Arrays.asList("A", "B", "C")) {
if (!otherOutcome.equals(outcome)) {
Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(otherOutcome));
}
}
}
}
示例13: testStringOutcomeLIBLINEAR
import org.cleartk.ml.Instance; //导入方法依赖的package包/类
@Test
public void testStringOutcomeLIBLINEAR() throws Exception {
// create the data writer
StringAnnotator annotator = new StringAnnotator();
annotator.initialize(UimaContextFactory.createUimaContext(
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
this.outputDirectoryName,
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
LibLinearStringOutcomeDataWriter.class.getName()));
// run process to produce a bunch of instances
annotator.process(null);
annotator.collectionProcessComplete();
// check that the output files were written for each class
BufferedReader reader = new BufferedReader(new FileReader(new File(
this.outputDirectoryName,
"training-data.liblinear")));
Assert.assertTrue(reader.readLine().length() > 0);
reader.close();
// run the training command
HideOutput hider = new HideOutput();
Train.main(this.outputDirectoryName, "-c", "1.0", "-s", "0");
hider.restoreOutput();
// read in the classifier and test it on new instances
LibLinearStringOutcomeClassifierBuilder builder = new LibLinearStringOutcomeClassifierBuilder();
LibLinearStringOutcomeClassifier classifier;
classifier = builder.loadClassifierFromTrainingDirectory(this.outputDirectory);
for (Instance<String> instance : ExampleInstanceFactory.generateStringInstances(1000)) {
List<Feature> features = instance.getFeatures();
String outcome = instance.getOutcome();
Assert.assertEquals(outcome, classifier.classify(features));
Map<String, Double> scoredOutcomes = classifier.score(features);
for (String otherOutcome : Arrays.asList("A", "B", "C")) {
if (!otherOutcome.equals(outcome)) {
Assert.assertTrue(scoredOutcomes.get(outcome) > scoredOutcomes.get(otherOutcome));
}
}
}
}