本文整理汇总了Java中cc.mallet.pipe.SerialPipes类的典型用法代码示例。如果您正苦于以下问题:Java SerialPipes类的具体用法?Java SerialPipes怎么用?Java SerialPipes使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SerialPipes类属于cc.mallet.pipe包,在下文中一共展示了SerialPipes类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testPrint
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public void testPrint() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."), new TokenText(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(), });
InstanceList one = new InstanceList(p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe(new ArrayIterator(data));
CRF crf = new CRF(p, null);
crf.addFullyConnectedStatesForThreeQuarterLabels(one);
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crf.setWeightsDimensionAsIn(one, false);
Optimizable mcrf = crft.getOptimizableCRF(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params[i] = i;
}
mcrf.setParameters(params);
crf.print();
}
示例2: makeSpacePredictionPipe
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public static Pipe makeSpacePredictionPipe ()
{
Pipe p = new SerialPipes(new Pipe[]{
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true,
new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
{1}, {-1, 0}, {0, 1},
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
//{-2,-1}, {-1,0}, {0,1}, {1,2},
//{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence()
});
return p;
}
示例3: disabledtestPrint
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public void disabledtestPrint ()
{
Pipe p = new SerialPipes (new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenText(),
new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(),
});
InstanceList one = new InstanceList (p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe (new ArrayIterator (data));
MEMM crf = new MEMM (p, null);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn (one);
MEMMTrainer memmt = new MEMMTrainer (crf);
MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params [i] = i;
}
mcrf.setParameters (params);
crf.print ();
}
示例4: InstanceList
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
/**
* Creates a list consisting of randomly-generated
* <code>FeatureVector</code>s.
*/
// xxx Perhaps split these out into a utility class
public InstanceList (Randoms r,
// the generator of all random-ness used here
Dirichlet classCentroidDistribution,
// includes a Alphabet
double classCentroidAverageAlphaMean,
// Gaussian mean on the sum of alphas
double classCentroidAverageAlphaVariance,
// Gaussian variance on the sum of alphas
double featureVectorSizePoissonLambda,
double classInstanceCountPoissonLambda,
String[] classNames)
{
this (new SerialPipes (new Pipe[] {
new TokenSequence2FeatureSequence (),
new FeatureSequence2FeatureVector (),
new Target2Label()}));
//classCentroidDistribution.print();
Iterator<Instance> iter = new RandomTokenSequenceIterator (
r, classCentroidDistribution,
classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance,
featureVectorSizePoissonLambda, classInstanceCountPoissonLambda,
classNames);
this.addThruPipe (iter);
}
示例5: testMultiTagSerialization
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException
{
Pipe origPipe = new SerialPipes (new Pipe[] {
new SimpleTaggerSentence2TokenSequence (),
new TokenText (),
new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
new OffsetFeatureConjunction ("time",
new String[] { "digits", "ampm" },
new int[] { 0, 1 },
true),
new PrintInputAndTarget (),
});
Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
InstanceList mtLst = new InstanceList (mtPipe);
mtLst.addThruPipe (new ArrayIterator (doc1));
Instance mtInst = mtLst.get (0);
TokenSequence mtTs = (TokenSequence) mtInst.getData ();
assertEquals (6, mtTs.size ());
assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
}
示例6: concatenatePipes
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public static Pipe concatenatePipes (Pipe p1, Pipe p2)
{
Alphabet dataDict = combinedDataDicts (p1, p2);
Alphabet targetDict = combinedTargetDicts (p1, p2);
Pipe ret = new SerialPipes (new Pipe[] { p1, p2 });
if (dataDict != null) ret.dataAlphabetResolved = true;
if (targetDict != null) ret.targetAlphabetResolved = true;
ret.dataAlphabet = dataDict;
ret.targetAlphabet = targetDict;
return ret;
}
示例7: hackCrfExtor
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
static CRFExtractor hackCrfExtor (CRF crf)
{
Pipe[] newPipes = new Pipe [3];
SerialPipes pipes = (SerialPipes) crf.getInputPipe ();
for (int i = 0; i < 3; i++) {
Pipe p0 = pipes.getPipe (0);
//pipes.removePipe (0); TODO Fix me
//p0.setParent (null);
newPipes[i] = p0;
}
Pipe tokPipe = new SerialPipes (newPipes);
CRFExtractor extor = new CRFExtractor (crf, (Pipe)tokPipe);
return extor;
}
示例8: build
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public Pipe build() {
pipes = new LinkedList<Pipe>();
pipes.add(new SimpleTaggerSentence2TokenSequence(false));
addFeatures();
pipes.add(new TokenSequence2FeatureVectorSequence(true, false));
if (useSCL) {
Pipe pipe = new SerialPipes(pipes);
InstanceList sourceTrainInstances = new InstanceList(pipe);
sourceTrainInstances.addThruPipe(sclSourceIt);
InstanceList targetTrainInstances = new InstanceList(pipe);
targetTrainInstances.addThruPipe(sclTargetIt);
SCL scl = trainSCL(sourceTrainInstances, targetTrainInstances);
pipes.removeLast();
pipes.add(new TokenSequence2FeatureVectorSequence(false, true));
pipes.add(new SCLAugment(scl));
}
return new SerialPipes(pipes);
}
示例9: getInstances
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public static InstanceList getInstances(Map<String, String> texts, Collection<String> stopwords) throws IOException {
List<Pipe> pipeList = getPipelist(stopwords);
InstanceList instances = new InstanceList (new SerialPipes(pipeList));
for(Entry<String, String> e : texts.entrySet()) {
String id = e.getKey();
String text = e.getValue();
if(text.length() < 50)
continue;
Instance instance = new Instance(text, id, id, id);
instances.addThruPipe(instance);
}
return instances;
}
示例10: main
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public static void main(String[] args) throws IOException, Exception {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new Target2Label());
pipes.add(new CharSequence2TokenSequence());
pipes.add(new TokenSequence2FeatureSequence());
pipes.add(new FeatureSequence2FeatureVector());
SerialPipes pipe = new SerialPipes(pipes);
//prepare training instances
InstanceList trainingInstanceList = new InstanceList(pipe);
trainingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-train-stemmed.txt"),
"(.*)\t(.*)", 2, 1, -1));
//prepare test instances
InstanceList testingInstanceList = new InstanceList(pipe);
testingInstanceList.addThruPipe(new CsvIterator(new FileReader("webkb-test-stemmed.txt"),
"(.*)\t(.*)", 2, 1, -1));
ClassifierTrainer trainer = new SVMClassifierTrainer(new LinearKernel());
Classifier classifier = trainer.train(trainingInstanceList);
System.out.println("Accuracy: " + classifier.getAccuracy(testingInstanceList));
}
示例11: main
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public static void main(String[] args) {
try {
for (int i = 0; i < args.length; i++) {
Instance carrier = new Instance(new File(args[i]), null, null, null);
SerialPipes p = new SerialPipes(new Pipe[]{
new Input2CharSequence(),
new ChineseSequence2TokenSequence()});
carrier = p.newIteratorFrom(new SingleInstanceIterator(carrier)).next();
TokenSequence ts = (TokenSequence) carrier.getData();
System.out.println("===");
System.out.println(args[i]);
System.out.println(ts.toString());
}
} catch (Exception e) {
System.out.println(e);
e.printStackTrace();
}
}
示例12: disabledtestPrint
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
public void disabledtestPrint ()
{
Pipe p = new SerialPipes (new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenText(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(),
});
InstanceList one = new InstanceList (p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe (new ArrayIterator (data));
MEMM crf = new MEMM (p, null);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn (one);
MEMMTrainer memmt = new MEMMTrainer (crf);
MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params [i] = i;
}
mcrf.setParameters (params);
crf.print ();
}
示例13: makePipe
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new SWordConverterPipe(),
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new PhoneNeighborPipe(true, makeNeighbors()), // grab neighboring graphemes
new PhoneClassPipe(true, makeClassNeighbors()),
new VowelNeighborPipe(),
// new SurroundingTokenFeature(false),
// new SurroundingTokenFeature(true),
// new NeighborShapeFeature(true, makeShapeNeighs()),
new IsFirstPipe(),
new ThisPhoneClassPipe(),
// new AppendEndPipe(), // right before TS2F to get text set, last not to mess w neighbors
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}
示例14: makePipe
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2LabelSequence labelPipe = new Target2LabelSequence();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new StringListToTokenSequence(alpha, labelAlpha), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
// new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, true),
labelPipe
));
}
示例15: makePipe
import cc.mallet.pipe.SerialPipes; //导入依赖的package包/类
private Pipe makePipe() {
Alphabet alpha = new Alphabet();
Target2Label labelPipe = new Target2Label();
LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet();
return new SerialPipes(ImmutableList.of(
new AlignToStressPipe(alpha, labelAlpha,
ImmutableList.<StressFeature>of()
), // convert to token sequence
new TokenSequenceLowercase(), // make all lowercase
new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes
new SurroundingTokenFeature(false),
new SurroundingTokenFeature(true),
new NeighborShapeFeature(true, makeShapeNeighs()),
new LeadingTrailingFeature(),
new TokenSequenceToFeature(), // convert the strings in the text to features
new TokenSequence2FeatureVectorSequence(alpha, true, false),
labelPipe
));
}