本文整理汇总了Java中cc.mallet.pipe.TokenSequenceLowercase类的典型用法代码示例。如果您正苦于以下问题:Java TokenSequenceLowercase类的具体用法?Java TokenSequenceLowercase怎么用?Java TokenSequenceLowercase使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TokenSequenceLowercase类属于cc.mallet.pipe包,在下文中一共展示了TokenSequenceLowercase类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: makeSpacePredictionPipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
public static Pipe makeSpacePredictionPipe ()
{
Pipe p = new SerialPipes(new Pipe[]{
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true,
new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
{1}, {-1, 0}, {0, 1},
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
//{-2,-1}, {-1,0}, {0,1}, {1,2},
//{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence()
});
return p;
}
示例2: makePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new SWordConverterPipe(),
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new PhoneNeighborPipe(true, makeNeighbors()), // grab neighboring graphemes
new PhoneClassPipe(true, makeClassNeighbors()),
new VowelNeighborPipe(),
// new SurroundingTokenFeature(false),
// new SurroundingTokenFeature(true),
// new NeighborShapeFeature(true, makeShapeNeighs()),
new IsFirstPipe(),
new ThisPhoneClassPipe(),
// new AppendEndPipe(), // right before TS2F to get text set, last not to mess w neighbors
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例3: makePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
// new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, true),
labelPipe
));
}
示例4: makePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2Label labelPipe = new Target2Label();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new AlignToStressPipe(alpha, labelAlpha,
ImmutableList.<StressFeature>of()
), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例5: makePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例6: makePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private static Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new NeighborShapeFeature(true, makeShapeNeighs()),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, true),
labelPipe,
new LabelSequenceToLabelsAssignment(alpha, labelAlpha)
));
}
示例7: createInstanceList
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
/**
* Creates a list of Malelt instances from a list of documents
* @param texts a list of documents
* @return a list of Mallet instances
* @throws IOException
*/
private InstanceList createInstanceList(List<String> texts) throws IOException
{
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new CharSequence2TokenSequence());
pipes.add(new TokenSequenceLowercase());
pipes.add(new TokenSequenceRemoveStopwords());
pipes.add(new TokenSequence2FeatureSequence());
InstanceList instanceList = new InstanceList(new SerialPipes(pipes));
instanceList.addThruPipe(new ArrayIterator(texts));
return instanceList;
}
示例8: makeSpacePredictionPipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private Pipe makeSpacePredictionPipe() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true, new int[][] { { 0 }, { 1 },
{ -1, 0 },
// Original test had this conjunction in it too
// {1},{-1,0},{0,1},
// {0, 1},
// I'd like to comment out this next line to make it run
// faster, but then we'd need to adjust likelihood and
// accuracy test values. -akm 12/2007
// TODO uncomment this line
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
// (These were commented before...)
// {-2,-1}, {-1,0}, {0,1}, {1,2},
// {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence() });
return p;
}
示例9: createInstanceList
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private InstanceList createInstanceList(List<String> texts) throws IOException {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
//pipes.add(new CharSequence2TokenSequence());
pipes.add(new CharSequence2TokenSequence("([a-zA-Z0-9]|[-]|[~]|[_]|[.]|[/]|ä|ü|ö|Ä|Ö|Ü|ß|<|>|)+"));
pipes.add(new TokenSequenceLowercase());
//pipes.add(new TokenSequenceRemoveStopwords());
pipes.add(new TokenSequence2FeatureSequence());
InstanceList instanceList = new InstanceList(new SerialPipes(pipes));
instanceList.addThruPipe(new ArrayIterator(texts));
return instanceList;
}
示例10: getPipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
/**
*
* @param model
* @param targetProcessing
* @return
*/
private Pipe getPipe() {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new Target2Label());
pipes.add(new SaveDataInSource());
pipes.add(new Input2CharSequence("UTF-8"));
pipes.add(new CharSequence2TokenSequence(Pattern.compile("\\p{Alpha}+")));
pipes.add(new TokenSequenceLowercase());
pipes.add(new TokenSequenceRemoveStopwords(false, false));
pipes.add(new TokenSequence2FeatureSequence());
// pipes.add(new PrintInputAndTarget());
return new SerialPipes(pipes);
}
示例11: makePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private SerialPipes makePipe(Alphabet alpha) {
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new AlignmentToTokenSequence(alpha, labelAlpha, true, true, false), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new NeighborShapeFeature(true, makeShapeNeighs()),
// new WindowFeature(false, 4),
// new WindowFeature(true, 6),
new NeighborSyllableFeature(-2, -1, 1, 2),
new SyllCountingFeature(),
new SyllCharRoleFeature(),
// new NearSyllFeature(true),
// new NearSyllFeature(false),
// new SyllMarkingFeature(),
// new SyllSequenceFeature(),
// new SyllRelativeMarkFeature(),
new EndingVowelFeature(),
//new SonorityFeature2(true),
//new SonorityFeature2(false),
// new WindowFeature(false, 4),
new VowelWindowFeature(2, 1, "PRESYL_", -1, false),
new VowelWindowFeature(2, 1, "PSTSYL_", 1, false),
// new VowelWindowFeature(3, 2, "LSTSYL_", 0, true),
new SurroundingTokenFeature2(false, 1, 1),
// new SurroundingTokenFeature2(true, 1, 1),
new SurroundingTokenFeature2(false, 2, 2),
// new SurroundingTokenFeature2(false, 3, 2),
new SurroundingTokenFeature2(true, 3, 3),
// new SurroundingTokenFeature2(true, 4, 4),
// new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例12: makePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
private static Pipe makePipe() {
Alphabet alpha = new Alphabet();
JointInputToTokenSequence inputPipe = new JointInputToTokenSequence(alpha, new LabelAlphabet(), new LabelAlphabet());
return new SerialPipes(ImmutableList.of(
inputPipe,
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new NeighborShapeFeature(true, makeShapeNeighs()),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, true)
));
}
示例13: testThree
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
public void testThree ()
{
InstanceList il = new InstanceList (
new SerialPipes(new Pipe[] {
new Target2Label(),
new CharSequence2TokenSequence(),
new TokenSequenceLowercase(),
new TokenSequenceRemoveStopwords(),
new TokenSequence2FeatureSequence(),
new FeatureSequence2FeatureVector()
}));
Iterator<Instance> pi = new FileIterator(new File("foo/bar"), null, Pattern.compile("^([^/]*)/"));
il.addThruPipe (pi);
}
示例14: createPipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
public Pipe createPipe () {
return new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence(),
new TokenSequenceLowercase(),
new TokenSequence2FeatureSequence(),
new FeatureSequence2FeatureVector()});
}
示例15: testSpacePipe
import cc.mallet.pipe.TokenSequenceLowercase; //导入依赖的package包/类
public void testSpacePipe () {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestCRF.TestCRFTokenSequenceRemoveSpaces (),
new TokenText(),
new OffsetConjunctions(false,
new int[][] {{0},
{1},{-1,0},{0,1},
{-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3},
}),
new PrintInputAndTarget(),
});
// Print to a string
ByteArrayOutputStream out = new ByteArrayOutputStream ();
PrintStream oldOut = System.out;
System.setOut (new PrintStream (out));
InstanceList lst = new InstanceList (p);
lst.addThruPipe (new ArrayIterator(new String[] { TestCRF.data[0],
TestCRF.data[1], }));
System.setOut (oldOut);
assertEquals (spacePipeOutput, out.toString());
}