当前位置: 首页>>代码示例>>Java>>正文


Java StringToWordVector类代码示例

本文整理汇总了Java中weka.filters.unsupervised.attribute.StringToWordVector的典型用法代码示例。如果您正苦于以下问题:Java StringToWordVector类的具体用法?Java StringToWordVector怎么用?Java StringToWordVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


StringToWordVector类属于weka.filters.unsupervised.attribute包,在下文中一共展示了StringToWordVector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: LearnNaiveBays

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnNaiveBays() throws Exception 
{
	trainedData.setClassIndex(trainedData.numAttributes()-1);
       filter=new StringToWordVector();
       classifier=new FilteredClassifier();
       classifier.setFilter(filter);
       classifier.setClassifier(new NaiveBayes());
       classifier.buildClassifier(trainedData);
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:11,代码来源:ExtractClassificationTextNaiveBaysImpl.java

示例2: applyAndSaveFilters

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
private void applyAndSaveFilters(String pathToSaveFilter) throws ClassifierException {
        try {
            log.info("Applying StringToWordVector filter...");
//            String[] options = Utils.splitOptions("-R first-last -W 5000 -prune-rate 20.0 -C -N 0 " +
//                    "-stemmer weka.core.stemmers.NullStemmer -M 3 -tokenizer \"weka.core.tokenizers.NGramTokenizer " +
//                    "-delimiters \\\" \\\\r\\\\n\\\\t.,;:\\\\\\'\\\\\\\"()?!\\\" -max 1 -min 1\"");
//
            String[] options = Utils.splitOptions("-R first-last -W 5000 -prune-rate 20.0 -T -I -N 0 -L -stemmer weka" +
                    ".core.stemmers.NullStemmer -M 1 -tokenizer \"weka.core.tokenizers.WordTokenizer -delimiters \\\" \\\\r\\\\n\\\\t.,;:\\\\\\'\\\\\\\"()?!\\\"\"");
            StringToWordVector filter = new StringToWordVector();
            filter.setOptions(options);
            filter.setInputFormat(loaderInstance);
            loaderInstance = Filter.useFilter(loaderInstance, filter);
            log.info("StringToWordVector Filter applied");
            loaderInstance.setClass(loaderInstance.attribute(0));
            FilterUtil.saveFilter(filter, pathToSaveFilter);
        } catch (Exception e) {
            log.error("Failed to apply filter.", e);
            throw new ClassifierException("Data Filtering failed.", e);
        }
    }
 
开发者ID:sasinda,项目名称:OntologyBasedInormationExtractor,代码行数:22,代码来源:WekaPreProcessor.java

示例3: afterPropertiesSet

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
 * Loads the training data as configured in {@link #dataConfig} and trains a
 * 3-gram SVM classifier.
 */
@Override
public void afterPropertiesSet() throws Exception {
	this.trainingData = svmTrainer.train();
	StringToWordVector stwvFilter = createFilter(this.trainingData);
	// Instances filterdInstances = Filter.useFilter(data, stwv);

	LibSVM svm = new LibSVM();
	svm.setKernelType(new SelectedTag(0, LibSVM.TAGS_KERNELTYPE));
	svm.setSVMType(new SelectedTag(0, LibSVM.TAGS_SVMTYPE));
	svm.setProbabilityEstimates(true);
	// svm.buildClassifier(filterdInstances);

	FilteredClassifier filteredClassifier = new FilteredClassifier();
	filteredClassifier.setFilter(stwvFilter);
	filteredClassifier.setClassifier(svm);
	filteredClassifier.buildClassifier(this.trainingData);
	this.classifier = filteredClassifier;

	// predict("nice cool amazing awesome beautiful");
	// predict("this movie is simply awesome");
	// predict("its very bad");
	// predict("Not that great");
}
 
开发者ID:venilnoronha,项目名称:movie-rating-prediction,代码行数:28,代码来源:SVMPredictorImpl.java

示例4: testSplittingAndJoining

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
  * tests splitOptions and joinOptions
  * 
  * @see Utils#splitOptions(String)
  * @see Utils#joinOptions(String[])
  */
 public void testSplittingAndJoining() {
   String[] 	options;
   String[] 	newOptions;
   String 	joined;
   int		i;
   
   try {
     options    = new StringToWordVector().getOptions();
     joined     = Utils.joinOptions(options);
     newOptions = Utils.splitOptions(joined);
     assertEquals("Same number of options", options.length, newOptions.length);
     for (i = 0; i < options.length; i++) {
if (!options[i].equals(newOptions[i]))
  fail("Option " + (i+1) + " differs");
     }
   }
   catch (Exception e) {
     fail("Exception: " + e);
   }
 }
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:27,代码来源:UtilsTest.java

示例5: testBackQuoting

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
 * tests backQuoteChars and unbackQuoteChars
 * 
 * @see Utils#backQuoteChars(String)
 * @see Utils#unbackQuoteChars(String)
 */
public void testBackQuoting() {
  String 	input;
  String 	output;
  
  input  = "blahblah";
  output = Utils.backQuoteChars(input);
  assertTrue("No backquoting necessary", input.equals(output));
  
  input  = "\r\n\t'\"%";
  output = Utils.backQuoteChars(input);
  assertTrue(">" + input + "< restored", input.equals(Utils.unbackQuoteChars(output)));
  
  input  = "\\r\\n\\t\\'\\\"\\%";
  output = Utils.backQuoteChars(input);
  assertTrue(">" + input + "< restored", input.equals(Utils.unbackQuoteChars(output)));
  
  input  = Utils.joinOptions(new StringToWordVector().getOptions());
  output = Utils.backQuoteChars(input);
  assertTrue(">" + input + "< restored", input.equals(Utils.unbackQuoteChars(output)));
}
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:27,代码来源:UtilsTest.java

示例6: generateAndWriteFullModel

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
 * Generate the model with all data and write it with the appropriate filters.
 *
 * @param instances as returned from {@link WekaUtils#createInstances(String)}
 * @throws Exception
 */
private void generateAndWriteFullModel(Instances instances) throws Exception {
  StringToWordVector stringToWordVector = getStringToWordVectorFilter(instances);
  Instances stringsReplacedData = Filter.useFilter(instances, stringToWordVector);
  Instances resampled = dumbResample(stringsReplacedData);
  Remove removeFilter = getRemoveFilter(stringsReplacedData);
  LibSVM svmForOut = new LibSVM();
  svmForOut.setCost(config.getSvmCost().get(0));
  svmForOut.setGamma(config.getSvmGamma().get(0));
  FilteredClassifier filteredClassifierForOut = new FilteredClassifier();
  filteredClassifierForOut.setClassifier(svmForOut);
  filteredClassifierForOut.setFilter(removeFilter);
  filteredClassifierForOut.buildClassifier(resampled);
  weka.core.SerializationHelper.write(config.getOutputDir() + "/svm_model_c_" + config.getSvmCost().get(0) + "_gamma_" + config.getSvmGamma().get(0) + ".model", filteredClassifierForOut);
  weka.core.SerializationHelper.write(config.getOutputDir() + "/string_filter_c_" + config.getSvmCost().get(0) + "_gamma_" + config.getSvmGamma().get(0) + ".model", stringToWordVector);
}
 
开发者ID:ag-gipp,项目名称:mathosphere,代码行数:22,代码来源:WekaLearner.java

示例7: initializeFiltersForGraphemes

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
private MultiFilter initializeFiltersForGraphemes(Instances trainingInstancesSet) throws Exception {
	StringToWordVector currentFilter = new StringToWordVector();
	currentFilter.setAttributeIndices("first");
	currentFilter.setAttributeNamePrefix("current_");
	currentFilter.setOutputWordCounts(false);
	currentFilter.setTokenizer(new SpaceTokenizer());

	StringToWordVector previousFilter = new StringToWordVector();
	previousFilter.setAttributeIndices("first-1");
	previousFilter.setAttributeNamePrefix("previous_");
	previousFilter.setOutputWordCounts(false);
	previousFilter.setTokenizer(new SpaceTokenizer());

	StringToWordVector followingFilter = new StringToWordVector();
	followingFilter.setAttributeIndices("first-2");
	followingFilter.setAttributeNamePrefix("following_");
	followingFilter.setOutputWordCounts(false);
	followingFilter.setTokenizer(new SpaceTokenizer());

	MultiFilter multiFilter = new MultiFilter();
	multiFilter.setInputFormat(trainingInstancesSet);
	multiFilter.setFilters(new Filter[] { currentFilter, previousFilter, followingFilter });
	return multiFilter;
}
 
开发者ID:lagodiuk,项目名称:pos-tagger,代码行数:25,代码来源:POSTagger.java

示例8: learn

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
/**
 * This method trains the classifier on the loaded dataset.
 */
public void learn() {
	try {
		trainData.setClassIndex(0);
		filter = new StringToWordVector();
		filter.setAttributeIndices("last");
		classifier = new FilteredClassifier();
		classifier.setFilter(filter);
		classifier.setClassifier(new NaiveBayes());
		
		
		classifier.buildClassifier(trainData);
		// Uncomment to see the classifier
		System.out.println("===== Training on filtered (training) dataset =====");
		//System.out.println(classifier);
	} catch (Exception e) {
		System.out.println("Problem found when training");
	}
}
 
开发者ID:amplia,项目名称:weka-classifier-examples,代码行数:22,代码来源:Learner.java

示例9: LearnRandomForest

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnRandomForest() throws Exception 
{
	   trainedData.setClassIndex(trainedData.numAttributes()-1);
        filter=new StringToWordVector();
        classifier=new FilteredClassifier();
        classifier.setFilter(filter);
        classifier.setClassifier(new RandomForest());
        classifier.buildClassifier(trainedData);
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:11,代码来源:ExtractClassificationTextRandomForestImpl.java

示例10: LearnKNN

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnKNN() throws Exception {
	trainedData.setClassIndex(trainedData.numAttributes()-1);
       filter=new StringToWordVector();
       classifier=new FilteredClassifier();
       classifier.setFilter(filter);
       classifier.setClassifier(new IBk());
       classifier.buildClassifier(trainedData);
	
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:11,代码来源:ExtractClassificationTextKNNImpl.java

示例11: LearnSVM

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnSVM() throws Exception 
{
	trainedData.setClassIndex(trainedData.numAttributes()-1);
       filter=new StringToWordVector();
       classifier=new FilteredClassifier();
       classifier.setFilter(filter);
       classifier.setClassifier(new SMO());
       classifier.buildClassifier(trainedData);
	
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:12,代码来源:ExtractClassificationTextSVMImpl.java

示例12: LearnNeuralNetwork

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnNeuralNetwork() throws Exception 
{
	   trainedData.setClassIndex(trainedData.numAttributes()-1);
        filter=new StringToWordVector();
        classifier=new FilteredClassifier();
        classifier.setFilter(filter);
        classifier.setClassifier(new MultilayerPerceptron());
        classifier.buildClassifier(trainedData);
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:11,代码来源:ExtractClassificationTextNeuralNetworkImpl.java

示例13: LearnDecisionTree

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnDecisionTree() throws Exception 
{
	trainedData.setClassIndex(trainedData.numAttributes()-1);
       filter=new StringToWordVector();
       classifier=new FilteredClassifier();
       classifier.setFilter(filter);
       classifier.setClassifier(new J48());
       classifier.buildClassifier(trainedData);
	
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:12,代码来源:ExtractClassificationTextDecisionTreeImpl.java

示例14: LearnLogisticRegression

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
@Override
public void LearnLogisticRegression() throws Exception 
{
	trainedData.setClassIndex(trainedData.numAttributes()-1);
       filter=new StringToWordVector();
       classifier=new FilteredClassifier();
       classifier.setFilter(filter);
       classifier.setClassifier(new Logistic());
       classifier.buildClassifier(trainedData);
	
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:12,代码来源:ExtractClassificationTextLogisticRegressionImpl.java

示例15: setWordsToKeep

import weka.filters.unsupervised.attribute.StringToWordVector; //导入依赖的package包/类
public void setWordsToKeep(int wordsToKeep) {
	MultiFilter mf = (MultiFilter) ((FilteredClassifier) model).getFilter();
	Filter[] filters = mf.getFilters();
	for (Filter f : filters)
		if (f instanceof StringToWordVector)
			((StringToWordVector) f).setWordsToKeep(wordsToKeep);

	// TODO Precisa setar novamente o filter?
}
 
开发者ID:michelole,项目名称:ICDClassifier,代码行数:10,代码来源:CipeClassifier.java


注:本文中的weka.filters.unsupervised.attribute.StringToWordVector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。