本文整理汇总了Java中cc.mallet.pipe.tsf.OffsetConjunctions类的典型用法代码示例。如果您正苦于以下问题:Java OffsetConjunctions类的具体用法?Java OffsetConjunctions怎么用?Java OffsetConjunctions使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
OffsetConjunctions类属于cc.mallet.pipe.tsf包,在下文中一共展示了OffsetConjunctions类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: makeSpacePredictionPipe
import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public static Pipe makeSpacePredictionPipe ()
{
Pipe p = new SerialPipes(new Pipe[]{
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true,
new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
{1}, {-1, 0}, {0, 1},
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
//{-2,-1}, {-1,0}, {0,1}, {1,2},
//{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence()
});
return p;
}
示例2: buildSerialPipes
import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
private SerialPipes buildSerialPipes(List<String> featureNames, List<String> replacements,
List<String> conjunctions) throws LangDetectException, IOException {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new LineGroupString2TokenSequence());
pipes.add(new AddTargetToLinePipe(6));
pipes.add(new LineToTargetTextPipe());
pipes.add(new TargetReplacementPipe(replacements));
FeaturePipeProvider featurePipeProvider = new FeaturePipeProvider();
for (String featureName : featureNames) {
pipes.add(featurePipeProvider.getPipe(featureName));
}
int[][] offsetConjunctions = new int[conjunctions.size()][];
for (int i = 0; i < conjunctions.size(); i++) {
String conjunction = conjunctions.get(i).replaceAll("min", "-");
String[] conjunctionElements = conjunction.split(";");
int[] conjunctionArray = new int[conjunctionElements.length];
for (int j = 0; j < conjunctionElements.length; j++) {
conjunctionArray[j] = Integer.parseInt(conjunctionElements[j]);
}
offsetConjunctions[i] = conjunctionArray;
}
pipes.add(new OffsetConjunctions(offsetConjunctions));
pipes.add(new TokenSequence2FeatureVectorSequence(false, false));
pipes.add(new Target2LabelSequence());
// pipes.add(new PrintInputAndTarget());
return new SerialPipes(pipes);
}
示例3: makeSpacePredictionPipe
import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
private Pipe makeSpacePredictionPipe() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true, new int[][] { { 0 }, { 1 },
{ -1, 0 },
// Original test had this conjunction in it too
// {1},{-1,0},{0,1},
// {0, 1},
// I'd like to comment out this next line to make it run
// faster, but then we'd need to adjust likelihood and
// accuracy test values. -akm 12/2007
// TODO uncomment this line
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
// (These were commented before...)
// {-2,-1}, {-1,0}, {0,1}, {1,2},
// {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence() });
return p;
}
示例4: testSpacePipe
import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public void testSpacePipe () {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestCRF.TestCRFTokenSequenceRemoveSpaces (),
new TokenText(),
new OffsetConjunctions(false,
new int[][] {{0},
{1},{-1,0},{0,1},
{-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3},
}),
new PrintInputAndTarget(),
});
// Print to a string
ByteArrayOutputStream out = new ByteArrayOutputStream ();
PrintStream oldOut = System.out;
System.setOut (new PrintStream (out));
InstanceList lst = new InstanceList (p);
lst.addThruPipe (new ArrayIterator(new String[] { TestCRF.data[0],
TestCRF.data[1], }));
System.setOut (oldOut);
assertEquals (spacePipeOutput, out.toString());
}
示例5: createDefaultPipes
import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public static SerialPipes createDefaultPipes(Alphabet dataAlphabet, Alphabet targetAlphabet) {
List<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new TokenText());
pipes.add(new TokenTextCharPrefix("PREFIX=", 2));
pipes.add(new TokenTextCharPrefix("PREFIX=", 3));
pipes.add(new TokenTextCharSuffix("SUFFIX=", 2));
pipes.add(new TokenTextCharSuffix("SUFFIX=", 3));
pipes.add(new TokenTextCharNGrams("NGRAM=", new int[] { 2, 3 }));
pipes.add(new RegexMatches("ALL_CAPS_REGEX", Pattern.compile(TextUtil.ALL_CAPS_REGEX)));
pipes.add(new RegexMatches("ALPHA_NUMERIC_REGEX", Pattern.compile(TextUtil.ALPHA_NUMERIC_REGEX)));
pipes.add(new RegexMatches("CAPS_MIX_REGEX", Pattern.compile(TextUtil.CAPS_MIX_REGEX)));
pipes.add(new RegexMatches("EMAIL_REGEX", Pattern.compile(TextUtil.EMAIL_REGEX)));
pipes.add(new RegexMatches("END_DASH_REGEX", Pattern.compile(TextUtil.END_DASH_REGEX)));
pipes.add(new RegexMatches("EXP_NUMBER_REGEX", Pattern.compile(TextUtil.EXP_NUMBER_REGEX)));
pipes.add(new RegexMatches("FLOATING_POINT_NUMBER_REGEX", Pattern.compile(TextUtil.FLOATING_POINT_NUMBER_REGEX)));
pipes.add(new RegexMatches("FOUR_CAPS_REGEX", Pattern.compile(TextUtil.FOUR_CAPS_REGEX)));
pipes.add(new RegexMatches("FOUR_DIGITS_REGEX", Pattern.compile(TextUtil.FOUR_DIGITS_REGEX)));
pipes.add(new RegexMatches("HAS_DASH_REGEX", Pattern.compile(TextUtil.HAS_DASH_REGEX)));
pipes.add(new RegexMatches("HAS_DIGIT_REGEX", Pattern.compile(TextUtil.HAS_DIGIT_REGEX)));
pipes.add(new RegexMatches("HEX_REGEX", Pattern.compile(TextUtil.HEX_REGEX)));
pipes.add(new RegexMatches("HTML_REGEX", Pattern.compile(TextUtil.HTML_REGEX)));
pipes.add(new RegexMatches("IN_PARENTHESES_REGEX", Pattern.compile(TextUtil.IN_PARENTHESES_REGEX)));
pipes.add(new RegexMatches("INIT_CAPS_ALPHA_REGEX", Pattern.compile(TextUtil.INIT_CAPS_ALPHA_REGEX)));
pipes.add(new RegexMatches("INIT_CAPS_REGEX", Pattern.compile(TextUtil.INIT_CAPS_REGEX)));
pipes.add(new RegexMatches("INIT_DASH_REGEX", Pattern.compile(TextUtil.INIT_DASH_REGEX)));
pipes.add(new RegexMatches("IP_REGEX", Pattern.compile(TextUtil.IP_REGEX)));
pipes.add(new RegexMatches("NEGATIVE_INTEGER_REGEX", Pattern.compile(TextUtil.NEGATIVE_INTEGER_REGEX)));
pipes.add(new RegexMatches("ONE_CAP_REGEX", Pattern.compile(TextUtil.ONE_CAP_REGEX)));
pipes.add(new RegexMatches("ONE_DIGIT_REGEX", Pattern.compile(TextUtil.ONE_DIGIT_REGEX)));
pipes.add(new RegexMatches("POSITIVE_INTEGER_REGEX", Pattern.compile(TextUtil.POSITIVE_INTEGER_REGEX)));
pipes.add(new RegexMatches("PUNCTUATION_REGEX", Pattern.compile(TextUtil.PUNCTUATION_REGEX)));
pipes.add(new RegexMatches("ROMAN_NUMBER_CAPITAL_REGEX", Pattern.compile(TextUtil.ROMAN_NUMBER_CAPITAL_REGEX)));
pipes.add(new RegexMatches("ROMAN_NUMBER_SMALL_REGEX", Pattern.compile(TextUtil.ROMAN_NUMBER_SMALL_REGEX)));
pipes.add(new RegexMatches("SINGLE_INITIAL_REGEX", Pattern.compile(TextUtil.SINGLE_INITIAL_REGEX)));
pipes.add(new RegexMatches("THREE_CAPS_REGEX", Pattern.compile(TextUtil.THREE_CAPS_REGEX)));
pipes.add(new RegexMatches("THREE_DIGITS_REGEX", Pattern.compile(TextUtil.THREE_DIGITS_REGEX)));
pipes.add(new RegexMatches("TWO_CAPS_REGEX", Pattern.compile(TextUtil.TWO_CAPS_REGEX)));
pipes.add(new RegexMatches("TWO_DIGITS_REGEX", Pattern.compile(TextUtil.TWO_DIGITS_REGEX)));
pipes.add(new RegexMatches("URL_REGEX", Pattern.compile(TextUtil.URL_REGEX)));
pipes.add(new RegexMatches("YEAR_REGEX", Pattern.compile(TextUtil.YEAR_REGEX)));
pipes.add(new RegexMatches("OBD_REGEX", Pattern.compile(TextUtil.OBD_REGEX)));
pipes.add(new RegexMatches("ONE_QUESTION_MARK_REGEX", Pattern.compile(TextUtil.ONE_QUESTION_MARK_REGEX)));
pipes.add(new RegexMatches("TWO_QUESTION_MARKS_REGEX", Pattern.compile(TextUtil.TWO_QUESTION_MARKS_REGEX)));
pipes.add(new RegexMatches("THREE_QUESTION_MARKS_REGEX", Pattern.compile(TextUtil.THREE_QUESTION_MARKS_REGEX)));
pipes.add(new RegexMatches("MULTIPLE_QUESTION_MARKS_REGEX", Pattern
.compile(TextUtil.MULTIPLE_QUESTION_MARKS_REGEX)));
pipes.add(new RegexMatches("ONE_EXCLAMATION_MARK_REGEX", Pattern.compile(TextUtil.ONE_EXCLAMATION_MARK_REGEX)));
pipes.add(new RegexMatches("TWO_EXCLAMATION_MARKS_REGEX", Pattern.compile(TextUtil.TWO_EXCLAMATION_MARKS_REGEX)));
pipes.add(new RegexMatches("THREE_EXCLAMATION_MARKS_REGEX", Pattern
.compile(TextUtil.THREE_EXCLAMATION_MARKS_REGEX)));
pipes.add(new RegexMatches("MULTIPLE_EXCLAMATION_MARKS_REGEX", Pattern
.compile(TextUtil.MULTIPLE_EXCLAMATION_MARKS_REGEX)));
pipes.add(new RegexMatches("QUESTION_EXCLAMATION_MARK_REGEX", Pattern
.compile(TextUtil.QUESTION_EXCLAMATION_MARK_REGEX)));
pipes.add(new RegexMatches("EXCLAMATION_QUESTION_MARK_REGEX", Pattern
.compile(TextUtil.EXCLAMATION_QUESTION_MARK_REGEX)));
pipes.add(new OffsetConjunctions(new int[][] { { -1 }, { 1 } }));
pipes.add(new TokenSequence2FeatureVectorSequence(targetAlphabet));
SerialPipes serialPipes = new SerialPipes(pipes);
serialPipes.setDataAlphabet(dataAlphabet);
serialPipes.setTargetAlphabet(targetAlphabet);
serialPipes.setTargetProcessing(true);
return serialPipes;
}
示例6: TrainCRF
import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public TrainCRF(String trainingFilename, String testingFilename) throws IOException {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
int[][] conjunctions = new int[2][];
conjunctions[0] = new int[] { -1 };
conjunctions[1] = new int[] { 1 };
pipes.add(new SimpleTaggerSentence2TokenSequence());
pipes.add(new OffsetConjunctions(conjunctions));
//pipes.add(new FeaturesInWindow("PREV-", -1, 1));
pipes.add(new TokenTextCharSuffix("C1=", 1));
pipes.add(new TokenTextCharSuffix("C2=", 2));
pipes.add(new TokenTextCharSuffix("C3=", 3));
pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*")));
pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*")));
pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*")));
pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*")));
pipes.add(new TokenFirstPosition("FIRSTTOKEN"));
pipes.add(new TokenSequence2FeatureVectorSequence());
Pipe pipe = new SerialPipes(pipes);
InstanceList trainingInstances = new InstanceList(pipe);
InstanceList testingInstances = new InstanceList(pipe);
trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true));
testingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(testingFilename)))), Pattern.compile("^\\s*$"), true));
CRF crf = new CRF(pipe, null);
//crf.addStatesForLabelsConnectedAsIn(trainingInstances);
crf.addStatesForThreeQuarterLabelsConnectedAsIn(trainingInstances);
crf.addStartState();
CRFTrainerByLabelLikelihood trainer =
new CRFTrainerByLabelLikelihood(crf);
trainer.setGaussianPriorVariance(10.0);
//CRFTrainerByStochasticGradient trainer =
//new CRFTrainerByStochasticGradient(crf, 1.0);
//CRFTrainerByL1LabelLikelihood trainer =
// new CRFTrainerByL1LabelLikelihood(crf, 0.75);
//trainer.addEvaluator(new PerClassAccuracyEvaluator(trainingInstances, "training"));
trainer.addEvaluator(new PerClassAccuracyEvaluator(testingInstances, "testing"));
trainer.addEvaluator(new TokenAccuracyEvaluator(testingInstances, "testing"));
trainer.train(trainingInstances);
}
示例7: TrainWikiCRF
import cc.mallet.pipe.tsf.OffsetConjunctions; //导入依赖的package包/类
public TrainWikiCRF(String trainingFilename, String testingFilename) throws IOException {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
int[][] conjunctions = new int[2][];
conjunctions[0] = new int[] { -1 };
conjunctions[1] = new int[] { 1 };
pipes.add(new SimpleTaggerSentence2TokenSequence());
pipes.add(new OffsetConjunctions(conjunctions));
//pipes.add(new FeaturesInWindow("PREV-", -1, 1));
pipes.add(new TokenTextCharSuffix("C1=", 1));
pipes.add(new TokenTextCharSuffix("C2=", 2));
pipes.add(new TokenTextCharSuffix("C3=", 3));
pipes.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*")));
pipes.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*")));
pipes.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*")));
pipes.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*")));
pipes.add(new TokenFirstPosition("FIRSTTOKEN"));
pipes.add(new TokenSequence2FeatureVectorSequence());
Pipe pipe = new SerialPipes(pipes);
InstanceList trainingInstances = new InstanceList(pipe);
InstanceList testingInstances = new InstanceList(pipe);
trainingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(trainingFilename)))), Pattern.compile("^\\s*$"), true));
testingInstances.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(testingFilename)))), Pattern.compile("^\\s*$"), true));
CRF crf = new CRF(pipe, null);
//crf.addStatesForLabelsConnectedAsIn(trainingInstances);
crf.addStatesForThreeQuarterLabelsConnectedAsIn(trainingInstances);
crf.addStartState();
CRFTrainerByLabelLikelihood trainer =
new CRFTrainerByLabelLikelihood(crf);
trainer.setGaussianPriorVariance(10.0);
//CRFTrainerByStochasticGradient trainer =
//new CRFTrainerByStochasticGradient(crf, 1.0);
//CRFTrainerByL1LabelLikelihood trainer =
// new CRFTrainerByL1LabelLikelihood(crf, 0.75);
//trainer.addEvaluator(new PerClassAccuracyEvaluator(trainingInstances, "training"));
trainer.addEvaluator(new PerClassAccuracyEvaluator(testingInstances, "testing"));
trainer.addEvaluator(new TokenAccuracyEvaluator(testingInstances, "testing"));
trainer.train(trainingInstances);
}