本文整理汇总了Java中cc.mallet.types.InstanceList.addThruPipe方法的典型用法代码示例。如果您正苦于以下问题:Java InstanceList.addThruPipe方法的具体用法?Java InstanceList.addThruPipe怎么用?Java InstanceList.addThruPipe使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.InstanceList
的用法示例。
在下文中一共展示了InstanceList.addThruPipe方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testSpaceViewer
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceViewer () throws IOException
{
Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
String[] data0 = { TestCRF.data[0] };
String[] data1 = { TestCRF.data[1] };
InstanceList training = new InstanceList (pipe);
training.addThruPipe (new ArrayIterator (data0));
InstanceList testing = new InstanceList (pipe);
testing.addThruPipe (new ArrayIterator (data1));
CRF crf = new CRF (pipe, null);
crf.addFullyConnectedStatesForLabels ();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
crft.trainIncremental (training);
CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf);
Extraction extraction = extor.extract (new ArrayIterator (data1));
if (!outputDir.exists ()) outputDir.mkdir ();
DocumentViewer.writeExtraction (outputDir, extraction);
}
示例2: testParenGroupIterator
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testParenGroupIterator ()
{
String input = "(a (b c) ((d)) ) f\n\n (3\n 4) ( 6) ";
Reader reader = new StringReader (input);
ParenGroupIterator it = new ParenGroupIterator (reader);
Pipe pipe = new Noop();
pipe.setTargetProcessing (false);
InstanceList lst = new InstanceList (pipe);
lst.addThruPipe (it);
assertEquals (3, lst.size());
assertEquals ("(a (b c) ((d)) )", lst.get(0).getData());
assertEquals ("(3\n 4)", lst.get(1).getData());
assertEquals ("( 6)", lst.get(2).getData());
}
示例3: testTokenAccuracy
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testTokenAccuracy() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
InstanceList[] lists = instances.split(new Random(777), new double[] {
.5, .5 });
CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crft.setUseSparseWeights(true);
crft.trainIncremental(lists[0]);
TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
new String[] { "Train", "Test" });
eval.evaluateInstanceList(crft, lists[1], "Test");
assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);
}
示例4: testPrint
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testPrint() {
Pipe p = new SerialPipes(new Pipe[] {
new CharSequence2TokenSequence("."), new TokenText(),
new TestCRFTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(), });
InstanceList one = new InstanceList(p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe(new ArrayIterator(data));
CRF crf = new CRF(p, null);
crf.addFullyConnectedStatesForThreeQuarterLabels(one);
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crf.setWeightsDimensionAsIn(one, false);
Optimizable mcrf = crft.getOptimizableCRF(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params[i] = i;
}
mcrf.setParameters(params);
crf.print();
}
示例5: testSpaceSerializable
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceSerializable () throws IOException, ClassNotFoundException
{
Pipe p = makeSpacePredictionPipe ();
InstanceList training = new InstanceList (p);
training.addThruPipe (new ArrayIterator (data));
MEMM memm = new MEMM (p, null);
memm.addFullyConnectedStatesForLabels ();
memm.addStartState();
memm.setWeightsDimensionAsIn(training);
MEMMTrainer memmt = new MEMMTrainer (memm);
memmt.train (training, 10);
MEMM memm2 = (MEMM) TestSerializable.cloneViaSerialization (memm);
Optimizable.ByGradientValue mcrf1 = memmt.getOptimizableMEMM(training);
double val1 = mcrf1.getValue ();
Optimizable.ByGradientValue mcrf2 = memmt.getOptimizableMEMM(training);
double val2 = mcrf2.getValue ();
assertEquals (val1, val2, 1e-5);
}
示例6: testMultiTagSerialization
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException
{
Pipe origPipe = new SerialPipes (new Pipe[] {
new SimpleTaggerSentence2TokenSequence (),
new TokenText (),
new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
new OffsetFeatureConjunction ("time",
new String[] { "digits", "ampm" },
new int[] { 0, 1 },
true),
new PrintInputAndTarget (),
});
Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
InstanceList mtLst = new InstanceList (mtPipe);
mtLst.addThruPipe (new ArrayIterator (doc1));
Instance mtInst = mtLst.get (0);
TokenSequence mtTs = (TokenSequence) mtInst.getData ();
assertEquals (6, mtTs.size ());
assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
}
示例7: disabledtestPrint
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void disabledtestPrint ()
{
Pipe p = new SerialPipes (new Pipe[] {
new CharSequence2TokenSequence("."),
new TokenText(),
new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
new TokenSequence2FeatureVectorSequence(),
new PrintInputAndTarget(),
});
InstanceList one = new InstanceList (p);
String[] data = new String[] { "ABCDE", };
one.addThruPipe (new ArrayIterator (data));
MEMM crf = new MEMM (p, null);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn (one);
MEMMTrainer memmt = new MEMMTrainer (crf);
MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
double[] params = new double[mcrf.getNumParameters()];
for (int i = 0; i < params.length; i++) {
params [i] = i;
}
mcrf.setParameters (params);
crf.print ();
}
示例8: createExtractionFrom
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private Extraction createExtractionFrom (String[] predStrings, String[] trueStrings)
{
Pipe pipe = new SerialPipes (new Pipe[] {
new SGML2TokenSequence (new CharSequenceLexer (CharSequenceLexer.LEX_NONWHITESPACE_CLASSES ), "O"),
new Target2LabelSequence (),
new PrintInputAndTarget (),
});
InstanceList pred = new InstanceList (pipe);
pred.addThruPipe (new ArrayIterator (predStrings));
InstanceList targets = new InstanceList (pipe);
targets.addThruPipe (new ArrayIterator (trueStrings));
LabelAlphabet dict = (LabelAlphabet) pipe.getTargetAlphabet ();
Extraction extraction = new Extraction (null, dict);
for (int i = 0; i < pred.size(); i++) {
Instance aPred = pred.get (i);
Instance aTarget = targets.get (i);
Tokenization input = (Tokenization) aPred.getData ();
Sequence predSeq = (Sequence) aPred.getTarget ();
Sequence targetSeq = (Sequence) aTarget.getTarget ();
DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O");
extraction.addDocumentExtraction (docextr);
}
return extraction;
}
示例9: annotate
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public List<ReferenceLineAnnotation> annotate(List<String> linesWithLayout) throws IOException, AnalysisException {
StringBuilder lineStringBuilder = new StringBuilder();
for (String line : linesWithLayout) {
lineStringBuilder.append(line).append(System.lineSeparator());
}
BufferedReader lineReader = new BufferedReader(new StringReader(lineStringBuilder.toString()));
InstanceList inputInstances = new InstanceList(this.pipe);
inputInstances.addThruPipe(new LineGroupIterator(lineReader, Pattern.compile("^\\s*$"), true));
lineReader.close();
List<ReferenceLineAnnotation> referenceLineAnnotations = new ArrayList<ReferenceLineAnnotation>();
for (Instance instance : inputInstances) {
@SuppressWarnings("unchecked")
Sequence<String> inputSequence = (Sequence<String>) instance.getData();
SumLatticeDefault latticeDefault = new SumLatticeDefault(this.crf, inputSequence);
Alphabet outputAlphabet = this.crf.getOutputAlphabet();
if (linesWithLayout.size() != inputSequence.size()) {
throw new IllegalStateException("linesWithLayout.size()!=inputSequence.size()");
}
for (int i = 0; i < inputSequence.size(); i++) {
ReferenceLineAnnotation referenceLineAnnotation = new ReferenceLineAnnotation(
linesWithLayout.get(i).split("\\t")[0]);
for (int j = 1; j <= outputAlphabet.size(); j++) {
State state = this.crf.getState(j);
referenceLineAnnotation.addAnnotation(state.getName(),
latticeDefault.getGammaProbability(i + 1, state));
}
referenceLineAnnotations.add(referenceLineAnnotation);
}
}
return referenceLineAnnotations;
}
示例10: testSpaceViewer
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceViewer () throws FileNotFoundException
{
Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
String[] data0 = { TestCRF.data[0] };
String[] data1 = { TestCRF.data[1] };
InstanceList training = new InstanceList (pipe);
training.addThruPipe (new ArrayIterator (data0));
InstanceList testing = new InstanceList (pipe);
testing.addThruPipe (new ArrayIterator (data1));
CRF crf = new CRF (pipe, null);
crf.addFullyConnectedStatesForLabels ();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
crft.trainIncremental (training);
CRFExtractor extor = hackCrfExtor (crf);
Extraction extration = extor.extract (new ArrayIterator (data1));
PrintStream out = new PrintStream (new FileOutputStream (htmlFile));
LatticeViewer.extraction2html (extration, extor, out);
out.close();
out = new PrintStream (new FileOutputStream (latticeFile));
LatticeViewer.extraction2html (extration, extor, out, true);
out.close();
}
示例11: setupData
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
protected void setupData ()
{
Timing timing = new Timing ();
training = new InstanceList (featurePipe);
training.addThruPipe (new PipedIterator (trainIterator, tokPipe));
if (trainingPct > 0) training = subsetData (training, trainingPct);
if (testIterator != null) {
testing = new InstanceList (featurePipe);
testing.addThruPipe (new PipedIterator (testIterator, tokPipe));
if (testingPct > 0) testing = subsetData (testing, trainingPct);
}
timing.tick ("Data loading");
}
示例12: testTrainStochasticGradient
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testTrainStochasticGradient() {
Pipe p = makeSpacePredictionPipe();
Pipe p2 = new TestCRF2String();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
InstanceList[] lists = instances.split(new double[] { .5, .5 });
CRF crf = new CRF(p, p2);
crf.addFullyConnectedStatesForLabels();
crf.setWeightsDimensionAsIn(lists[0], false);
CRFTrainerByStochasticGradient crft = new CRFTrainerByStochasticGradient(
crf, 0.0001);
System.out.println("Training Accuracy before training = "
+ crf.averageTokenAccuracy(lists[0]));
System.out.println("Testing Accuracy before training = "
+ crf.averageTokenAccuracy(lists[1]));
System.out.println("Training...");
// either fixed learning rate or selected on a sample
crft.setLearningRateByLikelihood(lists[0]);
// crft.setLearningRate(0.01);
crft.train(lists[0], 100);
crf.print();
System.out.println("Training Accuracy after training = "
+ crf.averageTokenAccuracy(lists[0]));
System.out.println("Testing Accuracy after training = "
+ crf.averageTokenAccuracy(lists[1]));
}
示例13: main
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static void main(String[] args) {
String htmldir = args[0];
Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(),
new CharSequenceRemoveHTML() });
InstanceList list = new InstanceList(pipe);
list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES));
for (int index = 0; index < list.size(); index++) {
Instance inst = list.get(index);
System.err.println(inst.getData());
}
}
示例14: testXis
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testXis() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
CRF crf1 = new CRF(p, null);
crf1.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
crf1);
crft1.train(instances, 10); // Let's get some parameters
Instance inst = instances.get(0);
Sequence input = (Sequence) inst.getData();
SumLatticeDefault lattice = new SumLatticeDefault(crf1, input,
(Sequence) inst.getTarget(), null, true);
for (int ip = 0; ip < lattice.length() - 1; ip++) {
for (int i = 0; i < crf1.numStates(); i++) {
Transducer.State state = crf1.getState(i);
Transducer.TransitionIterator it = state.transitionIterator(
input, ip);
double gamma = lattice.getGammaProbability(ip, state);
double xiSum = 0;
while (it.hasNext()) {
Transducer.State dest = it.nextState();
double xi = lattice.getXiProbability(ip, state, dest);
xiSum += xi;
}
assertEquals(gamma, xiSum, 1e-5);
}
}
}
示例15: testSpaceMaximizable
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceMaximizable ()
{
Pipe p = makeSpacePredictionPipe ();
InstanceList training = new InstanceList (p);
// String[] data = { TestMEMM.data[0], }; // TestMEMM.data[1], TestMEMM.data[2], TestMEMM.data[3], };
// String[] data = { "ab" };
training.addThruPipe (new ArrayIterator (data));
// CRF4 memm = new CRF4 (p, null);
MEMM memm = new MEMM (p, null);
memm.addFullyConnectedStatesForLabels ();
memm.addStartState();
memm.setWeightsDimensionAsIn(training);
MEMMTrainer memmt = new MEMMTrainer (memm);
// memm.gatherTrainingSets (training); // ANNOYING: Need to set up per-instance training sets
memmt.train (training, 1); // Set weights dimension, gathers training sets, etc.
// memm.print();
// memm.printGradient = true;
// memm.printInstanceLists();
// memm.setGaussianPriorVariance (Double.POSITIVE_INFINITY);
Optimizable.ByGradientValue mcrf = memmt.getOptimizableMEMM(training);
TestOptimizable.setNumComponents (150);
TestOptimizable.testValueAndGradient (mcrf);
}