本文整理汇总了Java中cc.mallet.pipe.Pipe类的典型用法代码示例。如果您正苦于以下问题:Java Pipe类的具体用法?Java Pipe怎么用?Java Pipe使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Pipe类属于cc.mallet.pipe包,在下文中一共展示了Pipe类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testSpaceViewer
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testSpaceViewer () throws IOException
{
Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
String[] data0 = { TestCRF.data[0] };
String[] data1 = { TestCRF.data[1] };
InstanceList training = new InstanceList (pipe);
training.addThruPipe (new ArrayIterator (data0));
InstanceList testing = new InstanceList (pipe);
testing.addThruPipe (new ArrayIterator (data1));
CRF crf = new CRF (pipe, null);
crf.addFullyConnectedStatesForLabels ();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
crft.trainIncremental (training);
CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf);
Extraction extraction = extor.extract (new ArrayIterator (data1));
if (!outputDir.exists ()) outputDir.mkdir ();
DocumentViewer.writeExtraction (outputDir, extraction);
}
示例2: extractIndependentFeaturesHelper
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
/**
* Extract the independent features for a single instance annotation.
* Extract the independent features for a single annotation according to the information
* in the featureInfo object. The information in the featureInfo instance gets updated
* by this.
* NOTE: this method is static so that it can be used in the CorpusRepresentationMalletSeq class too.
* @param instanceAnnotation
* @param inputAS
* @param targetFeatureName
* @param featureInfo
* @param pipe
* @param nameFeature
* @return
*/
static Instance extractIndependentFeaturesHelper(
Annotation instanceAnnotation,
AnnotationSet inputAS,
FeatureInfo featureInfo,
Pipe pipe) {
AugmentableFeatureVector afv = new AugmentableFeatureVector(pipe.getDataAlphabet());
// Constructor parms: data, target, name, source
Instance inst = new Instance(afv, null, null, null);
for(FeatureSpecAttribute attr : featureInfo.getAttributes()) {
FeatureExtraction.extractFeature(inst, attr, inputAS, instanceAnnotation);
}
// TODO: we destructively replace the AugmentableFeatureVector by a FeatureVector here,
// but it is not clear if this is beneficial - our assumption is that yes.
inst.setData(((AugmentableFeatureVector)inst.getData()).toFeatureVector());
return inst;
}
示例3: CRF
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public CRF (Pipe inputPipe, Pipe outputPipe)
{
super (inputPipe, outputPipe);
this.inputAlphabet = inputPipe.getDataAlphabet();
this.outputAlphabet = inputPipe.getTargetAlphabet();
//inputAlphabet.stopGrowth();
}
示例4: testTokenAccuracy
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testTokenAccuracy() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
InstanceList[] lists = instances.split(new Random(777), new double[] {
.5, .5 });
CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crft.setUseSparseWeights(true);
crft.trainIncremental(lists[0]);
TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
new String[] { "Train", "Test" });
eval.evaluateInstanceList(crft, lists[1], "Test");
assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);
}
示例5: testSpaceSerializable
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testSpaceSerializable () throws IOException, ClassNotFoundException
{
Pipe p = makeSpacePredictionPipe ();
InstanceList training = new InstanceList (p);
training.addThruPipe (new ArrayIterator (data));
MEMM memm = new MEMM (p, null);
memm.addFullyConnectedStatesForLabels ();
memm.addStartState();
memm.setWeightsDimensionAsIn(training);
MEMMTrainer memmt = new MEMMTrainer (memm);
memmt.train (training, 10);
MEMM memm2 = (MEMM) TestSerializable.cloneViaSerialization (memm);
Optimizable.ByGradientValue mcrf1 = memmt.getOptimizableMEMM(training);
double val1 = mcrf1.getValue ();
Optimizable.ByGradientValue mcrf2 = memmt.getOptimizableMEMM(training);
double val2 = mcrf2.getValue ();
assertEquals (val1, val2, 1e-5);
}
示例6: makeSpacePredictionPipe
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static Pipe makeSpacePredictionPipe ()
{
Pipe p = new SerialPipes(new Pipe[]{
new CharSequence2TokenSequence("."),
new TokenSequenceLowercase(),
new TestMEMMTokenSequenceRemoveSpaces(),
new TokenText(),
new OffsetConjunctions(true,
new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
{1}, {-1, 0}, {0, 1},
// {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
//{-2,-1}, {-1,0}, {0,1}, {1,2},
//{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
}),
// new PrintInputAndTarget(),
new TokenSequence2FeatureVectorSequence()
});
return p;
}
示例7: InstanceList
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
/**
* Creates a list consisting of randomly-generated
* <code>FeatureVector</code>s.
*/
// xxx Perhaps split these out into a utility class
public InstanceList (Randoms r,
// the generator of all random-ness used here
Dirichlet classCentroidDistribution,
// includes a Alphabet
double classCentroidAverageAlphaMean,
// Gaussian mean on the sum of alphas
double classCentroidAverageAlphaVariance,
// Gaussian variance on the sum of alphas
double featureVectorSizePoissonLambda,
double classInstanceCountPoissonLambda,
String[] classNames)
{
this (new SerialPipes (new Pipe[] {
new TokenSequence2FeatureSequence (),
new FeatureSequence2FeatureVector (),
new Target2Label()}));
//classCentroidDistribution.print();
Iterator<Instance> iter = new RandomTokenSequenceIterator (
r, classCentroidDistribution,
classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance,
featureVectorSizePoissonLambda, classInstanceCountPoissonLambda,
classNames);
this.addThruPipe (iter);
}
示例8: MaxEnt
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public MaxEnt (Pipe dataPipe,
double[] parameters,
FeatureSelection featureSelection,
FeatureSelection[] perClassFeatureSelection)
{
super (dataPipe);
assert (featureSelection == null || perClassFeatureSelection == null);
if (parameters != null)
this.parameters = parameters;
else
this.parameters = new double[getNumParameters(dataPipe)];
this.featureSelection = featureSelection;
this.perClassFeatureSelection = perClassFeatureSelection;
this.defaultFeatureIndex = dataPipe.getDataAlphabet().size();
// assert (parameters.getNumCols() == defaultFeatureIndex+1);
}
示例9: testMultiTagSerialization
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException
{
Pipe origPipe = new SerialPipes (new Pipe[] {
new SimpleTaggerSentence2TokenSequence (),
new TokenText (),
new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
new OffsetFeatureConjunction ("time",
new String[] { "digits", "ampm" },
new int[] { 0, 1 },
true),
new PrintInputAndTarget (),
});
Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
InstanceList mtLst = new InstanceList (mtPipe);
mtLst.addThruPipe (new ArrayIterator (doc1));
Instance mtInst = mtLst.get (0);
TokenSequence mtTs = (TokenSequence) mtInst.getData ();
assertEquals (6, mtTs.size ());
assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
}
示例10: concatenatePipes
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static Pipe concatenatePipes (Pipe p1, Pipe p2)
{
Alphabet dataDict = combinedDataDicts (p1, p2);
Alphabet targetDict = combinedTargetDicts (p1, p2);
Pipe ret = new SerialPipes (new Pipe[] { p1, p2 });
if (dataDict != null) ret.dataAlphabetResolved = true;
if (targetDict != null) ret.targetAlphabetResolved = true;
ret.dataAlphabet = dataDict;
ret.targetAlphabet = targetDict;
return ret;
}
示例11: testConcatenatePipes
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testConcatenatePipes ()
{
Pipe p1 = new StupidPipe ();
Pipe p2 = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence ();
// initialize p2's dict
p2.instanceFrom(new Instance (data, null, null, null));
assertEquals (3, p2.getDataAlphabet ().size());
Pipe serial = PipeUtils.concatenatePipes (p1, p2);
Alphabet dict = serial.getDataAlphabet ();
assertEquals (3, dict.size ());
assertTrue (dict == p2.getDataAlphabet ());
}
示例12: testParenGroupIterator
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testParenGroupIterator ()
{
String input = "(a (b c) ((d)) ) f\n\n (3\n 4) ( 6) ";
Reader reader = new StringReader (input);
ParenGroupIterator it = new ParenGroupIterator (reader);
Pipe pipe = new Noop();
pipe.setTargetProcessing (false);
InstanceList lst = new InstanceList (pipe);
lst.addThruPipe (it);
assertEquals (3, lst.size());
assertEquals ("(a (b c) ((d)) )", lst.get(0).getData());
assertEquals ("(3\n 4)", lst.get(1).getData());
assertEquals ("( 6)", lst.get(2).getData());
}
示例13: main
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static void main (String[] args) throws FileNotFoundException
{
File trainFile = new File (args[0]);
File testFile = new File (args[1]);
File crfFile = new File (args[2]);
Pipe pipe = new SerialPipes (new Pipe[] {
new GenericAcrfData2TokenSequence (2),
new TokenSequence2FeatureVectorSequence (true, true),
});
InstanceList training = new InstanceList (pipe);
training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
Pattern.compile ("\\s*"),
true));
InstanceList testing = new InstanceList (pipe);
testing.addThruPipe (new LineGroupIterator (new FileReader (testFile),
Pattern.compile ("\\s*"),
true));
ACRF.Template[] tmpls = new ACRF.Template[] {
new ACRF.BigramTemplate (0),
new ACRF.BigramTemplate (1),
new ACRF.PairwiseFactorTemplate (0,1),
new CrossTemplate1 (0,1)
};
ACRF acrf = new ACRF (pipe, tmpls);
ACRFTrainer trainer = new DefaultAcrfTrainer ();
trainer.train (acrf, training, null, testing, 99999);
FileUtils.writeGzippedObject (crfFile, acrf);
}
示例14: ACRF
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
/**
* Create a ACRF for a 1-d sequence. Needs an array
* of Templates.
*/
public ACRF (Pipe inputPipe, Template[] tmpls)
throws IllegalArgumentException
{
this.inputPipe = inputPipe;
this.templates = tmpls;
this.inputAlphabet = inputPipe.getDataAlphabet();
this.defaultFeatureIndex = inputAlphabet.size ();
for (int tidx = 0; tidx < templates.length; tidx++) templates [tidx].index = tidx;
}
示例15: setPipes
import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public ACRFExtractorTrainer setPipes (Pipe tokPipe, Pipe featurePipe)
{
RememberTokenizationPipe rtp = new RememberTokenizationPipe ();
this.featurePipe = PipeUtils.concatenatePipes (rtp, featurePipe);
this.tokPipe = tokPipe;
return this;
}