本文整理汇总了Java中weka.filters.unsupervised.attribute.StringToWordVector类的典型用法代码示例。如果您正苦于以下问题:Java StringToWordVector类的具体用法?Java StringToWordVector怎么用?Java StringToWordVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
StringToWordVector类属于weka.filters.unsupervised.attribute包,在下文中一共展示了StringToWordVector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: LearnNaiveBays
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnNaiveBays() throws Exception
{
trainedData.setClassIndex(trainedData.numAttributes()-1);
filter=new StringToWordVector();
classifier=new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new NaiveBayes());
classifier.buildClassifier(trainedData);
}
示例2: applyAndSaveFilters
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
private void applyAndSaveFilters(String pathToSaveFilter) throws ClassifierException {
try {
log.info("Applying StringToWordVector filter...");
// String[] options = Utils.splitOptions("-R first-last -W 5000 -prune-rate 20.0 -C -N 0 " +
// "-stemmer weka.core.stemmers.NullStemmer -M 3 -tokenizer \"weka.core.tokenizers.NGramTokenizer " +
// "-delimiters \\\" \\\\r\\\\n\\\\t.,;:\\\\\\'\\\\\\\"()?!\\\" -max 1 -min 1\"");
//
String[] options = Utils.splitOptions("-R first-last -W 5000 -prune-rate 20.0 -T -I -N 0 -L -stemmer weka" +
".core.stemmers.NullStemmer -M 1 -tokenizer \"weka.core.tokenizers.WordTokenizer -delimiters \\\" \\\\r\\\\n\\\\t.,;:\\\\\\'\\\\\\\"()?!\\\"\"");
StringToWordVector filter = new StringToWordVector();
filter.setOptions(options);
filter.setInputFormat(loaderInstance);
loaderInstance = Filter.useFilter(loaderInstance, filter);
log.info("StringToWordVector Filter applied");
loaderInstance.setClass(loaderInstance.attribute(0));
FilterUtil.saveFilter(filter, pathToSaveFilter);
} catch (Exception e) {
log.error("Failed to apply filter.", e);
throw new ClassifierException("Data Filtering failed.", e);
}
}
示例3: afterPropertiesSet
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
* Loads the training data as configured in {@link #dataConfig} and trains a
* 3-gram SVM classifier.
*/
@Override
public void afterPropertiesSet() throws Exception {
this.trainingData = svmTrainer.train();
StringToWordVector stwvFilter = createFilter(this.trainingData);
// Instances filterdInstances = Filter.useFilter(data, stwv);
LibSVM svm = new LibSVM();
svm.setKernelType(new SelectedTag(0, LibSVM.TAGS_KERNELTYPE));
svm.setSVMType(new SelectedTag(0, LibSVM.TAGS_SVMTYPE));
svm.setProbabilityEstimates(true);
// svm.buildClassifier(filterdInstances);
FilteredClassifier filteredClassifier = new FilteredClassifier();
filteredClassifier.setFilter(stwvFilter);
filteredClassifier.setClassifier(svm);
filteredClassifier.buildClassifier(this.trainingData);
this.classifier = filteredClassifier;
// predict("nice cool amazing awesome beautiful");
// predict("this movie is simply awesome");
// predict("its very bad");
// predict("Not that great");
}
示例4: testSplittingAndJoining
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
* tests splitOptions and joinOptions
*
* @see Utils#splitOptions(String)
* @see Utils#joinOptions(String[])
*/
public void testSplittingAndJoining() {
String[] options;
String[] newOptions;
String joined;
int i;
try {
options = new StringToWordVector().getOptions();
joined = Utils.joinOptions(options);
newOptions = Utils.splitOptions(joined);
assertEquals("Same number of options", options.length, newOptions.length);
for (i = 0; i < options.length; i++) {
if (!options[i].equals(newOptions[i]))
fail("Option " + (i+1) + " differs");
}
}
catch (Exception e) {
fail("Exception: " + e);
}
}
示例5: testBackQuoting
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
* tests backQuoteChars and unbackQuoteChars
*
* @see Utils#backQuoteChars(String)
* @see Utils#unbackQuoteChars(String)
*/
public void testBackQuoting() {
String input;
String output;
input = "blahblah";
output = Utils.backQuoteChars(input);
assertTrue("No backquoting necessary", input.equals(output));
input = "\r\n\t'\"%";
output = Utils.backQuoteChars(input);
assertTrue(">" + input + "< restored", input.equals(Utils.unbackQuoteChars(output)));
input = "\\r\\n\\t\\'\\\"\\%";
output = Utils.backQuoteChars(input);
assertTrue(">" + input + "< restored", input.equals(Utils.unbackQuoteChars(output)));
input = Utils.joinOptions(new StringToWordVector().getOptions());
output = Utils.backQuoteChars(input);
assertTrue(">" + input + "< restored", input.equals(Utils.unbackQuoteChars(output)));
}
示例6: generateAndWriteFullModel
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
* Generate the model with all data and write it with the appropriate filters.
*
* @param instances as returned from {@link WekaUtils#createInstances(String)}
* @throws Exception
*/
private void generateAndWriteFullModel(Instances instances) throws Exception {
StringToWordVector stringToWordVector = getStringToWordVectorFilter(instances);
Instances stringsReplacedData = Filter.useFilter(instances, stringToWordVector);
Instances resampled = dumbResample(stringsReplacedData);
Remove removeFilter = getRemoveFilter(stringsReplacedData);
LibSVM svmForOut = new LibSVM();
svmForOut.setCost(config.getSvmCost().get(0));
svmForOut.setGamma(config.getSvmGamma().get(0));
FilteredClassifier filteredClassifierForOut = new FilteredClassifier();
filteredClassifierForOut.setClassifier(svmForOut);
filteredClassifierForOut.setFilter(removeFilter);
filteredClassifierForOut.buildClassifier(resampled);
weka.core.SerializationHelper.write(config.getOutputDir() + "/svm_model_c_" + config.getSvmCost().get(0) + "_gamma_" + config.getSvmGamma().get(0) + ".model", filteredClassifierForOut);
weka.core.SerializationHelper.write(config.getOutputDir() + "/string_filter_c_" + config.getSvmCost().get(0) + "_gamma_" + config.getSvmGamma().get(0) + ".model", stringToWordVector);
}
示例7: initializeFiltersForGraphemes
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
private MultiFilter initializeFiltersForGraphemes(Instances trainingInstancesSet) throws Exception {
StringToWordVector currentFilter = new StringToWordVector();
currentFilter.setAttributeIndices("first");
currentFilter.setAttributeNamePrefix("current_");
currentFilter.setOutputWordCounts(false);
currentFilter.setTokenizer(new SpaceTokenizer());
StringToWordVector previousFilter = new StringToWordVector();
previousFilter.setAttributeIndices("first-1");
previousFilter.setAttributeNamePrefix("previous_");
previousFilter.setOutputWordCounts(false);
previousFilter.setTokenizer(new SpaceTokenizer());
StringToWordVector followingFilter = new StringToWordVector();
followingFilter.setAttributeIndices("first-2");
followingFilter.setAttributeNamePrefix("following_");
followingFilter.setOutputWordCounts(false);
followingFilter.setTokenizer(new SpaceTokenizer());
MultiFilter multiFilter = new MultiFilter();
multiFilter.setInputFormat(trainingInstancesSet);
multiFilter.setFilters(new Filter[] { currentFilter, previousFilter, followingFilter });
return multiFilter;
}
示例8: learn
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
* This method trains the classifier on the loaded dataset.
*/
public void learn() {
try {
trainData.setClassIndex(0);
filter = new StringToWordVector();
filter.setAttributeIndices("last");
classifier = new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new NaiveBayes());
classifier.buildClassifier(trainData);
// Uncomment to see the classifier
System.out.println("===== Training on filtered (training) dataset =====");
//System.out.println(classifier);
} catch (Exception e) {
System.out.println("Problem found when training");
}
}
示例9: LearnRandomForest
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnRandomForest() throws Exception
{
trainedData.setClassIndex(trainedData.numAttributes()-1);
filter=new StringToWordVector();
classifier=new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new RandomForest());
classifier.buildClassifier(trainedData);
}
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:11,代码来源:ExtractClassificationTextRandomForestImpl.java
示例10: LearnKNN
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnKNN() throws Exception {
trainedData.setClassIndex(trainedData.numAttributes()-1);
filter=new StringToWordVector();
classifier=new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new IBk());
classifier.buildClassifier(trainedData);
}
示例11: LearnSVM
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnSVM() throws Exception
{
trainedData.setClassIndex(trainedData.numAttributes()-1);
filter=new StringToWordVector();
classifier=new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new SMO());
classifier.buildClassifier(trainedData);
}
示例12: LearnNeuralNetwork
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnNeuralNetwork() throws Exception
{
trainedData.setClassIndex(trainedData.numAttributes()-1);
filter=new StringToWordVector();
classifier=new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new MultilayerPerceptron());
classifier.buildClassifier(trainedData);
}
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:11,代码来源:ExtractClassificationTextNeuralNetworkImpl.java
示例13: LearnDecisionTree
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnDecisionTree() throws Exception
{
trainedData.setClassIndex(trainedData.numAttributes()-1);
filter=new StringToWordVector();
classifier=new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new J48());
classifier.buildClassifier(trainedData);
}
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:12,代码来源:ExtractClassificationTextDecisionTreeImpl.java
示例14: LearnLogisticRegression
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnLogisticRegression() throws Exception
{
trainedData.setClassIndex(trainedData.numAttributes()-1);
filter=new StringToWordVector();
classifier=new FilteredClassifier();
classifier.setFilter(filter);
classifier.setClassifier(new Logistic());
classifier.buildClassifier(trainedData);
}
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:12,代码来源:ExtractClassificationTextLogisticRegressionImpl.java
示例15: setWordsToKeep
import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
public void setWordsToKeep(int wordsToKeep) {
MultiFilter mf = (MultiFilter) ((FilteredClassifier) model).getFilter();
Filter[] filters = mf.getFilters();
for (Filter f : filters)
if (f instanceof StringToWordVector)
((StringToWordVector) f).setWordsToKeep(wordsToKeep);
// TODO Precisa setar novamente o filter?
}