当前位置: 首页>>代码示例>>Java>>正文


Java OffsetConjunctions类代码示例

本文整理汇总了Java中cc.mallet.pipe.tsf.OffsetConjunctions的典型用法代码示例。如果您正苦于以下问题:Java OffsetConjunctions类的具体用法?Java OffsetConjunctions怎么用?Java OffsetConjunctions使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


OffsetConjunctions类属于cc.mallet.pipe.tsf包,在下文中一共展示了OffsetConjunctions类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: makeSpacePredictionPipe

import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public static  Pipe makeSpacePredictionPipe ()
  {
    Pipe p = new SerialPipes(new Pipe[]{
	    new CharSequence2TokenSequence("."),
	    new TokenSequenceLowercase(),
	    new TestMEMMTokenSequenceRemoveSpaces(),
	    new TokenText(),
	    new OffsetConjunctions(true,
	                           new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
	                             {1}, {-1, 0}, {0, 1},
//	                             {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
	                             //{-2,-1}, {-1,0}, {0,1}, {1,2},
	                             //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
	                           }),
//      new PrintInputAndTarget(),
	    new TokenSequence2FeatureVectorSequence()
	  });
    return p;
  }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:TestMEMM.java

示例2: buildSerialPipes

import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
private SerialPipes buildSerialPipes(List<String> featureNames, List<String> replacements,
        List<String> conjunctions) throws LangDetectException, IOException {
    ArrayList<Pipe> pipes = new ArrayList<Pipe>();
    pipes.add(new LineGroupString2TokenSequence());
    pipes.add(new AddTargetToLinePipe(6));
    pipes.add(new LineToTargetTextPipe());
    pipes.add(new TargetReplacementPipe(replacements));

    FeaturePipeProvider featurePipeProvider = new FeaturePipeProvider();
    for (String featureName : featureNames) {
        pipes.add(featurePipeProvider.getPipe(featureName));
    }

    int[][] offsetConjunctions = new int[conjunctions.size()][];
    for (int i = 0; i < conjunctions.size(); i++) {
        String conjunction = conjunctions.get(i).replaceAll("min", "-");
        String[] conjunctionElements = conjunction.split(";");
        int[] conjunctionArray = new int[conjunctionElements.length];
        for (int j = 0; j < conjunctionElements.length; j++) {
            conjunctionArray[j] = Integer.parseInt(conjunctionElements[j]);
        }
        offsetConjunctions[i] = conjunctionArray;
    }
    pipes.add(new OffsetConjunctions(offsetConjunctions));

    pipes.add(new TokenSequence2FeatureVectorSequence(false, false));
    pipes.add(new Target2LabelSequence());

    // pipes.add(new PrintInputAndTarget());

    return new SerialPipes(pipes);

}
 
开发者ID:exciteproject,项目名称:refext,代码行数:34,代码来源:ReferenceExtractorTrainer.java

示例3: makeSpacePredictionPipe

import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
private Pipe makeSpacePredictionPipe() {
	Pipe p = new SerialPipes(new Pipe[] {
			new CharSequence2TokenSequence("."),
			new TokenSequenceLowercase(),
			new TestCRFTokenSequenceRemoveSpaces(),
			new TokenText(),
			new OffsetConjunctions(true, new int[][] { { 0 }, { 1 },
					{ -1, 0 },

			// Original test had this conjunction in it too
					// {1},{-1,0},{0,1},
					// {0, 1},

					// I'd like to comment out this next line to make it run
					// faster, but then we'd need to adjust likelihood and
					// accuracy test values. -akm 12/2007
					// TODO uncomment this line
					// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},

					// (These were commented before...)
					// {-2,-1}, {-1,0}, {0,1}, {1,2},
					// {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
					}),
			// new PrintInputAndTarget(),
			new TokenSequence2FeatureVectorSequence() });
	return p;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:TestCRF.java

示例4: testSpacePipe

import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public void testSpacePipe () {
	Pipe p = new SerialPipes(new Pipe[] {
		new CharSequence2TokenSequence("."),
		new TokenSequenceLowercase(),
		new TestCRF.TestCRFTokenSequenceRemoveSpaces (),
		new TokenText(),
		new OffsetConjunctions(false,
														new int[][] {{0},
																				 {1},{-1,0},{0,1},
																				 {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3},
														}),
		new PrintInputAndTarget(),
	});

	// Print to a string
	ByteArrayOutputStream out = new ByteArrayOutputStream ();
	PrintStream oldOut = System.out;
	System.setOut (new PrintStream (out));

	InstanceList lst = new InstanceList (p);
	lst.addThruPipe (new ArrayIterator(new String[] { TestCRF.data[0],
																						 TestCRF.data[1], }));

	System.setOut (oldOut);
	
	assertEquals (spacePipeOutput, out.toString());
}
 
开发者ID:mimno,项目名称:Mallet,代码行数:28,代码来源:TestSpacePipe.java

示例5: createDefaultPipes

import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public static SerialPipes createDefaultPipes(Alphabet dataAlphabet, Alphabet targetAlphabet) {
	List<Pipe> pipes = new ArrayList<Pipe>();
	pipes.add(new TokenText());
	pipes.add(new TokenTextCharPrefix("PREFIX=", 2));
	pipes.add(new TokenTextCharPrefix("PREFIX=", 3));
	pipes.add(new TokenTextCharSuffix("SUFFIX=", 2));
	pipes.add(new TokenTextCharSuffix("SUFFIX=", 3));
	pipes.add(new TokenTextCharNGrams("NGRAM=", new int[] { 2, 3 }));
	pipes.add(new RegexMatches("ALL_CAPS_REGEX", Pattern.compile(TextUtil.ALL_CAPS_REGEX)));
	pipes.add(new RegexMatches("ALPHA_NUMERIC_REGEX", Pattern.compile(TextUtil.ALPHA_NUMERIC_REGEX)));
	pipes.add(new RegexMatches("CAPS_MIX_REGEX", Pattern.compile(TextUtil.CAPS_MIX_REGEX)));
	pipes.add(new RegexMatches("EMAIL_REGEX", Pattern.compile(TextUtil.EMAIL_REGEX)));
	pipes.add(new RegexMatches("END_DASH_REGEX", Pattern.compile(TextUtil.END_DASH_REGEX)));
	pipes.add(new RegexMatches("EXP_NUMBER_REGEX", Pattern.compile(TextUtil.EXP_NUMBER_REGEX)));
	pipes.add(new RegexMatches("FLOATING_POINT_NUMBER_REGEX", Pattern.compile(TextUtil.FLOATING_POINT_NUMBER_REGEX)));
	pipes.add(new RegexMatches("FOUR_CAPS_REGEX", Pattern.compile(TextUtil.FOUR_CAPS_REGEX)));
	pipes.add(new RegexMatches("FOUR_DIGITS_REGEX", Pattern.compile(TextUtil.FOUR_DIGITS_REGEX)));
	pipes.add(new RegexMatches("HAS_DASH_REGEX", Pattern.compile(TextUtil.HAS_DASH_REGEX)));
	pipes.add(new RegexMatches("HAS_DIGIT_REGEX", Pattern.compile(TextUtil.HAS_DIGIT_REGEX)));
	pipes.add(new RegexMatches("HEX_REGEX", Pattern.compile(TextUtil.HEX_REGEX)));
	pipes.add(new RegexMatches("HTML_REGEX", Pattern.compile(TextUtil.HTML_REGEX)));
	pipes.add(new RegexMatches("IN_PARENTHESES_REGEX", Pattern.compile(TextUtil.IN_PARENTHESES_REGEX)));
	pipes.add(new RegexMatches("INIT_CAPS_ALPHA_REGEX", Pattern.compile(TextUtil.INIT_CAPS_ALPHA_REGEX)));
	pipes.add(new RegexMatches("INIT_CAPS_REGEX", Pattern.compile(TextUtil.INIT_CAPS_REGEX)));
	pipes.add(new RegexMatches("INIT_DASH_REGEX", Pattern.compile(TextUtil.INIT_DASH_REGEX)));
	pipes.add(new RegexMatches("IP_REGEX", Pattern.compile(TextUtil.IP_REGEX)));
	pipes.add(new RegexMatches("NEGATIVE_INTEGER_REGEX", Pattern.compile(TextUtil.NEGATIVE_INTEGER_REGEX)));
	pipes.add(new RegexMatches("ONE_CAP_REGEX", Pattern.compile(TextUtil.ONE_CAP_REGEX)));
	pipes.add(new RegexMatches("ONE_DIGIT_REGEX", Pattern.compile(TextUtil.ONE_DIGIT_REGEX)));
	pipes.add(new RegexMatches("POSITIVE_INTEGER_REGEX", Pattern.compile(TextUtil.POSITIVE_INTEGER_REGEX)));
	pipes.add(new RegexMatches("PUNCTUATION_REGEX", Pattern.compile(TextUtil.PUNCTUATION_REGEX)));
	pipes.add(new RegexMatches("ROMAN_NUMBER_CAPITAL_REGEX", Pattern.compile(TextUtil.ROMAN_NUMBER_CAPITAL_REGEX)));
	pipes.add(new RegexMatches("ROMAN_NUMBER_SMALL_REGEX", Pattern.compile(TextUtil.ROMAN_NUMBER_SMALL_REGEX)));
	pipes.add(new RegexMatches("SINGLE_INITIAL_REGEX", Pattern.compile(TextUtil.SINGLE_INITIAL_REGEX)));
	pipes.add(new RegexMatches("THREE_CAPS_REGEX", Pattern.compile(TextUtil.THREE_CAPS_REGEX)));
	pipes.add(new RegexMatches("THREE_DIGITS_REGEX", Pattern.compile(TextUtil.THREE_DIGITS_REGEX)));
	pipes.add(new RegexMatches("TWO_CAPS_REGEX", Pattern.compile(TextUtil.TWO_CAPS_REGEX)));
	pipes.add(new RegexMatches("TWO_DIGITS_REGEX", Pattern.compile(TextUtil.TWO_DIGITS_REGEX)));
	pipes.add(new RegexMatches("URL_REGEX", Pattern.compile(TextUtil.URL_REGEX)));
	pipes.add(new RegexMatches("YEAR_REGEX", Pattern.compile(TextUtil.YEAR_REGEX)));
	pipes.add(new RegexMatches("OBD_REGEX", Pattern.compile(TextUtil.OBD_REGEX)));
	pipes.add(new RegexMatches("ONE_QUESTION_MARK_REGEX", Pattern.compile(TextUtil.ONE_QUESTION_MARK_REGEX)));
	pipes.add(new RegexMatches("TWO_QUESTION_MARKS_REGEX", Pattern.compile(TextUtil.TWO_QUESTION_MARKS_REGEX)));
	pipes.add(new RegexMatches("THREE_QUESTION_MARKS_REGEX", Pattern.compile(TextUtil.THREE_QUESTION_MARKS_REGEX)));
	pipes.add(new RegexMatches("MULTIPLE_QUESTION_MARKS_REGEX", Pattern
			.compile(TextUtil.MULTIPLE_QUESTION_MARKS_REGEX)));
	pipes.add(new RegexMatches("ONE_EXCLAMATION_MARK_REGEX", Pattern.compile(TextUtil.ONE_EXCLAMATION_MARK_REGEX)));
	pipes.add(new RegexMatches("TWO_EXCLAMATION_MARKS_REGEX", Pattern.compile(TextUtil.TWO_EXCLAMATION_MARKS_REGEX)));
	pipes.add(new RegexMatches("THREE_EXCLAMATION_MARKS_REGEX", Pattern
			.compile(TextUtil.THREE_EXCLAMATION_MARKS_REGEX)));
	pipes.add(new RegexMatches("MULTIPLE_EXCLAMATION_MARKS_REGEX", Pattern
			.compile(TextUtil.MULTIPLE_EXCLAMATION_MARKS_REGEX)));
	pipes.add(new RegexMatches("QUESTION_EXCLAMATION_MARK_REGEX", Pattern
			.compile(TextUtil.QUESTION_EXCLAMATION_MARK_REGEX)));
	pipes.add(new RegexMatches("EXCLAMATION_QUESTION_MARK_REGEX", Pattern
			.compile(TextUtil.EXCLAMATION_QUESTION_MARK_REGEX)));
	pipes.add(new OffsetConjunctions(new int[][] { { -1 }, { 1 } }));
	pipes.add(new TokenSequence2FeatureVectorSequence(targetAlphabet));
	SerialPipes serialPipes = new SerialPipes(pipes);
	serialPipes.setDataAlphabet(dataAlphabet);
	serialPipes.setTargetAlphabet(targetAlphabet);
	serialPipes.setTargetProcessing(true);
	return serialPipes;
}
 
开发者ID:jdmp,项目名称:java-data-mining-package,代码行数:65,代码来源:MalletUtil.java

示例6: TrainCRF

import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public TrainCRF(String trainingFilename, String testingFilename) throws IOException {

        ArrayList<Pipe> pipes = new ArrayList<Pipe>();

        int[][] conjunctions = new int[2][];
        conjunctions[0] = new int[] { -1 };
        conjunctions[1] = new int[] { 1 };

        pipes.add(new SimpleTaggerSentence2TokenSequence());
        pipes.add(new OffsetConjunctions(conjunctions));
        //pipes.add(new FeaturesInWindow("PREV-", -1, 1));
        pipes.add(new TokenTextCharSuffix("C1=", 1));
        pipes.add(new TokenTextCharSuffix("C2=", 2));
        pipes.add(new TokenTextCharSuffix("C3=", 3));
        pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*")));
        pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*")));
        pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*")));
        pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*")));
        pipes.add(new TokenFirstPosition("FIRSTTOKEN"));
        pipes.add(new TokenSequence2FeatureVectorSequence());

        Pipe pipe = new SerialPipes(pipes);

        InstanceList trainingInstances = new InstanceList(pipe);
        InstanceList testingInstances = new InstanceList(pipe);

        trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true));
        testingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(testingFilename)))), Pattern.compile("^\\s*$"), true));

        CRF crf = new CRF(pipe, null);
        //crf.addStatesForLabelsConnectedAsIn(trainingInstances);
        crf.addStatesForThreeQuarterLabelsConnectedAsIn(trainingInstances);
        crf.addStartState();

        CRFTrainerByLabelLikelihood trainer =
                new CRFTrainerByLabelLikelihood(crf);
        trainer.setGaussianPriorVariance(10.0);

        //CRFTrainerByStochasticGradient trainer =
        //new CRFTrainerByStochasticGradient(crf, 1.0);

        //CRFTrainerByL1LabelLikelihood trainer =
        //	new CRFTrainerByL1LabelLikelihood(crf, 0.75);

        //trainer.addEvaluator(new PerClassAccuracyEvaluator(trainingInstances, "training"));
        trainer.addEvaluator(new PerClassAccuracyEvaluator(testingInstances, "testing"));
        trainer.addEvaluator(new TokenAccuracyEvaluator(testingInstances, "testing"));
        trainer.train(trainingInstances);

    }
 
开发者ID:karahindiba,项目名称:WikiInfoboxExtractor,代码行数:51,代码来源:TrainCRF.java

示例7: TrainWikiCRF

import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public TrainWikiCRF(String trainingFilename, String testingFilename) throws IOException {
	
	ArrayList<Pipe> pipes = new ArrayList<Pipe>();

	int[][] conjunctions = new int[2][];
	conjunctions[0] = new int[] { -1 };
	conjunctions[1] = new int[] { 1 };

	pipes.add(new SimpleTaggerSentence2TokenSequence());
	pipes.add(new OffsetConjunctions(conjunctions));
	//pipes.add(new FeaturesInWindow("PREV-", -1, 1));
	pipes.add(new TokenTextCharSuffix("C1=", 1));
	pipes.add(new TokenTextCharSuffix("C2=", 2));
	pipes.add(new TokenTextCharSuffix("C3=", 3));
	pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*")));
	pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*")));
	pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*")));
	pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*")));
	pipes.add(new TokenFirstPosition("FIRSTTOKEN"));
	pipes.add(new TokenSequence2FeatureVectorSequence());

	Pipe pipe = new SerialPipes(pipes);

	InstanceList trainingInstances = new InstanceList(pipe);
	InstanceList testingInstances = new InstanceList(pipe);

	trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true));
	testingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(testingFilename)))), Pattern.compile("^\\s*$"), true));
	
	CRF crf = new CRF(pipe, null);
	//crf.addStatesForLabelsConnectedAsIn(trainingInstances);
	crf.addStatesForThreeQuarterLabelsConnectedAsIn(trainingInstances);
	crf.addStartState();

	CRFTrainerByLabelLikelihood trainer = 
		new CRFTrainerByLabelLikelihood(crf);
	trainer.setGaussianPriorVariance(10.0);

	//CRFTrainerByStochasticGradient trainer = 
	//new CRFTrainerByStochasticGradient(crf, 1.0);

	//CRFTrainerByL1LabelLikelihood trainer = 
	//	new CRFTrainerByL1LabelLikelihood(crf, 0.75);

	//trainer.addEvaluator(new PerClassAccuracyEvaluator(trainingInstances, "training"));
	trainer.addEvaluator(new PerClassAccuracyEvaluator(testingInstances, "testing"));
	trainer.addEvaluator(new TokenAccuracyEvaluator(testingInstances, "testing"));
	trainer.train(trainingInstances);
	
}
 
开发者ID:karahindiba,项目名称:WikiInfoboxExtractor,代码行数:51,代码来源:TrainWikiCRF.java


注:本文中的cc.mallet.pipe.tsf.OffsetConjunctions类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。