本文整理汇总了Java中cc.mallet.pipe.TokenSequence2FeatureVectorSequence类的典型用法代码示例。如果您正苦于以下问题:Java TokenSequence2FeatureVectorSequence类的具体用法?Java TokenSequence2FeatureVectorSequence怎么用?Java TokenSequence2FeatureVectorSequence使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TokenSequence2FeatureVectorSequence类属于cc.mallet.pipe包,在下文中一共展示了TokenSequence2FeatureVectorSequence类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testPrint
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public void testPrint() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."), new TokenText(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(), });
InstanceList one = new InstanceList(p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe(new ArrayIterator(data));
CRF crf = new CRF(p, null);
crf.addFullyConnectedStatesForThreeQuarterLabels(one);
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crf.setWeightsDimensionAsIn(one, false);
Optimizable mcrf = crft.getOptimizableCRF(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params[i] = i;
}
mcrf.setParameters(params);
crf.print();
}
示例2: makeSpacePredictionPipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public static Pipe makeSpacePredictionPipe ()
{
Pipe p = new SerialPipes(new Pipe[]{
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true,
new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
{1}, {-1, 0}, {0, 1},
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
//{-2,-1}, {-1,0}, {0,1}, {1,2},
//{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence()
});
return p;
}
示例3: disabledtestPrint
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
Pipe p = new SerialPipes (new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenText(),
new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(),
});
InstanceList one = new InstanceList (p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe (new ArrayIterator (data));
MEMM crf = new MEMM (p, null);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn (one);
MEMMTrainer memmt = new MEMMTrainer (crf);
MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params [i] = i;
}
mcrf.setParameters (params);
crf.print ();
}
示例4: build
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public Pipe build() {
pipes = new LinkedList<Pipe>();
pipes.add(new SimpleTaggerSentence2TokenSequence(false));
addFeatures();
pipes.add(new TokenSequence2FeatureVectorSequence(true, false));
if (useSCL) {
Pipe pipe = new SerialPipes(pipes);
InstanceList sourceTrainInstances = new InstanceList(pipe);
sourceTrainInstances.addThruPipe(sclSourceIt);
InstanceList targetTrainInstances = new InstanceList(pipe);
targetTrainInstances.addThruPipe(sclTargetIt);
SCL scl = trainSCL(sourceTrainInstances, targetTrainInstances);
pipes.removeLast();
pipes.add(new TokenSequence2FeatureVectorSequence(false, true));
pipes.add(new SCLAugment(scl));
}
return new SerialPipes(pipes);
}
示例5: disabledtestPrint
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public void disabledtestPrint ()
{
Pipe p = new SerialPipes (new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenText(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(),
});
InstanceList one = new InstanceList (p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe (new ArrayIterator (data));
MEMM crf = new MEMM (p, null);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn (one);
MEMMTrainer memmt = new MEMMTrainer (crf);
MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params [i] = i;
}
mcrf.setParameters (params);
crf.print ();
}
示例6: makePipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new SWordConverterPipe(),
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new PhoneNeighborPipe(true, makeNeighbors()), // grab neighboring graphemes
new PhoneClassPipe(true, makeClassNeighbors()),
new VowelNeighborPipe(),
// new SurroundingTokenFeature(false),
// new SurroundingTokenFeature(true),
// new NeighborShapeFeature(true, makeShapeNeighs()),
new IsFirstPipe(),
new ThisPhoneClassPipe(),
// new AppendEndPipe(), // right before TS2F to get text set, last not to mess w neighbors
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例7: makePipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
// new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, true),
labelPipe
));
}
示例8: makePipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2Label labelPipe = new Target2Label();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new AlignToStressPipe(alpha, labelAlpha,
ImmutableList.<StressFeature>of()
), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例9: makePipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例10: makePipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private static Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new NeighborShapeFeature(true, makeShapeNeighs()),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, true),
labelPipe,
new LabelSequenceToLabelsAssignment(alpha, labelAlpha)
));
}
示例11: main
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public static void main (String[] args) throws FileNotFoundException
{
File trainFile = new File (args[0]);
File testFile = new File (args[1]);
File crfFile = new File (args[2]);
Pipe pipe = new SerialPipes (new Pipe[] {
new GenericAcrfData2TokenSequence (2),
new TokenSequence2FeatureVectorSequence (true, true),
});
InstanceList training = new InstanceList (pipe);
training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
Pattern.compile ("\\s*"),
true));
InstanceList testing = new InstanceList (pipe);
testing.addThruPipe (new LineGroupIterator (new FileReader (testFile),
Pattern.compile ("\\s*"),
true));
ACRF.Template[] tmpls = new ACRF.Template[] {
new ACRF.BigramTemplate (0),
new ACRF.BigramTemplate (1),
new ACRF.PairwiseFactorTemplate (0,1),
new CrossTemplate1 (0,1)
};
ACRF acrf = new ACRF (pipe, tmpls);
ACRFTrainer trainer = new DefaultAcrfTrainer ();
trainer.train (acrf, training, null, testing, 99999);
FileUtils.writeGzippedObject (crfFile, acrf);
}
示例12: buildSerialPipes
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private SerialPipes buildSerialPipes(List<String> featureNames, List<String> replacements,
List<String> conjunctions) throws LangDetectException, IOException {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new LineGroupString2TokenSequence());
pipes.add(new AddTargetToLinePipe(6));
pipes.add(new LineToTargetTextPipe());
pipes.add(new TargetReplacementPipe(replacements));
FeaturePipeProvider featurePipeProvider = new FeaturePipeProvider();
for (String featureName : featureNames) {
pipes.add(featurePipeProvider.getPipe(featureName));
}
int[][] offsetConjunctions = new int[conjunctions.size()][];
for (int i = 0; i < conjunctions.size(); i++) {
String conjunction = conjunctions.get(i).replaceAll("min", "-");
String[] conjunctionElements = conjunction.split(";");
int[] conjunctionArray = new int[conjunctionElements.length];
for (int j = 0; j < conjunctionElements.length; j++) {
conjunctionArray[j] = Integer.parseInt(conjunctionElements[j]);
}
offsetConjunctions[i] = conjunctionArray;
}
pipes.add(new OffsetConjunctions(offsetConjunctions));
pipes.add(new TokenSequence2FeatureVectorSequence(false, false));
pipes.add(new Target2LabelSequence());
// pipes.add(new PrintInputAndTarget());
return new SerialPipes(pipes);
}
示例13: main
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
public static void main (String[] args) throws FileNotFoundException
{
File trainFile = new File (args[0]);
File testFile = new File (args[1]);
File crfFile = new File (args[2]);
Pipe pipe = new SerialPipes (new Pipe[] {
new GenericAcrfData2TokenSequence (2),
new TokenSequence2FeatureVectorSequence (true, true),
});
InstanceList training = new InstanceList (pipe);
training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
Pattern.compile ("\\s*"),
true));
InstanceList testing = new InstanceList (pipe);
training.addThruPipe (new LineGroupIterator (new FileReader (testFile),
Pattern.compile ("\\s*"),
true));
ACRF.Template[] tmpls = new ACRF.Template[] {
new ACRF.BigramTemplate (0),
new ACRF.BigramTemplate (1),
new ACRF.PairwiseFactorTemplate (0,1),
new CrossTemplate1 (0,1)
};
ACRF acrf = new ACRF (pipe, tmpls);
ACRFTrainer trainer = new DefaultAcrfTrainer ();
trainer.train (acrf, training, null, testing, 99999);
FileUtils.writeGzippedObject (crfFile, acrf);
}
示例14: makeSpacePredictionPipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private Pipe makeSpacePredictionPipe() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true, new int[][] { { 0 }, { 1 },
{ -1, 0 },
// Original test had this conjunction in it too
// {1},{-1,0},{0,1},
// {0, 1},
// I'd like to comment out this next line to make it run
// faster, but then we'd need to adjust likelihood and
// accuracy test values. -akm 12/2007
// TODO uncomment this line
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
// (These were commented before...)
// {-2,-1}, {-1,0}, {0,1}, {1,2},
// {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence() });
return p;
}
示例15: makePipe
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence; //导入依赖的package包/类
private SerialPipes makePipe(Alphabet alpha) {
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new AlignmentToTokenSequence(alpha, labelAlpha, true, true, false), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new NeighborShapeFeature(true, makeShapeNeighs()),
// new WindowFeature(false, 4),
// new WindowFeature(true, 6),
new NeighborSyllableFeature(-2, -1, 1, 2),
new SyllCountingFeature(),
new SyllCharRoleFeature(),
// new NearSyllFeature(true),
// new NearSyllFeature(false),
// new SyllMarkingFeature(),
// new SyllSequenceFeature(),
// new SyllRelativeMarkFeature(),
new EndingVowelFeature(),
//new SonorityFeature2(true),
//new SonorityFeature2(false),
// new WindowFeature(false, 4),
new VowelWindowFeature(2, 1, "PRESYL_", -1, false),
new VowelWindowFeature(2, 1, "PSTSYL_", 1, false),
// new VowelWindowFeature(3, 2, "LSTSYL_", 0, true),
new SurroundingTokenFeature2(false, 1, 1),
// new SurroundingTokenFeature2(true, 1, 1),
new SurroundingTokenFeature2(false, 2, 2),
// new SurroundingTokenFeature2(false, 3, 2),
new SurroundingTokenFeature2(true, 3, 3),
// new SurroundingTokenFeature2(true, 4, 4),
// new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}