当前位置: 首页>>代码示例>>Java>>正文


Java InstanceList.addThruPipe方法代码示例

本文整理汇总了Java中cc.mallet.types.InstanceList.addThruPipe方法的典型用法代码示例。如果您正苦于以下问题:Java InstanceList.addThruPipe方法的具体用法?Java InstanceList.addThruPipe怎么用?Java InstanceList.addThruPipe使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.InstanceList的用法示例。


在下文中一共展示了InstanceList.addThruPipe方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testSpaceViewer

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceViewer () throws IOException
{
  Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
  String[] data0 = { TestCRF.data[0] };
  String[] data1 = { TestCRF.data[1] };

  InstanceList training = new InstanceList (pipe);
  training.addThruPipe (new ArrayIterator (data0));
  InstanceList testing = new InstanceList (pipe);
  testing.addThruPipe (new ArrayIterator (data1));

  CRF crf = new CRF (pipe, null);
  crf.addFullyConnectedStatesForLabels ();
  CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
  crft.trainIncremental (training);

  CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf);
  Extraction extraction = extor.extract (new ArrayIterator (data1));

  if (!outputDir.exists ()) outputDir.mkdir ();
  DocumentViewer.writeExtraction (outputDir, extraction);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestDocumentViewer.java

示例2: testParenGroupIterator

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testParenGroupIterator ()
{
	String input = "(a (b c) ((d))  ) f\n\n (3\n 4) (  6) ";
	Reader reader = new StringReader (input);
	ParenGroupIterator it = new ParenGroupIterator (reader);
	Pipe pipe = new Noop();
	pipe.setTargetProcessing (false);

	InstanceList lst = new InstanceList (pipe);
	lst.addThruPipe (it);

	assertEquals (3, lst.size());
	assertEquals ("(a (b c) ((d))  )", lst.get(0).getData());
	assertEquals ("(3\n 4)", lst.get(1).getData());
	assertEquals ("(  6)", lst.get(2).getData());
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:TestIterators.java

示例3: testTokenAccuracy

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testTokenAccuracy() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));
	InstanceList[] lists = instances.split(new Random(777), new double[] {
			.5, .5 });

	CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crft.setUseSparseWeights(true);

	crft.trainIncremental(lists[0]);

	TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
			new String[] { "Train", "Test" });
	eval.evaluateInstanceList(crft, lists[1], "Test");

	assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestCRF.java

示例4: testPrint

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testPrint() {
	Pipe p = new SerialPipes(new Pipe[] {
			new CharSequence2TokenSequence("."), new TokenText(),
			new TestCRFTokenSequenceRemoveSpaces(),
			new TokenSequence2FeatureVectorSequence(),
			new PrintInputAndTarget(), });
	InstanceList one = new InstanceList(p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe(new ArrayIterator(data));
	CRF crf = new CRF(p, null);
	crf.addFullyConnectedStatesForThreeQuarterLabels(one);
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crf.setWeightsDimensionAsIn(one, false);
	Optimizable mcrf = crft.getOptimizableCRF(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params[i] = i;
	}
	mcrf.setParameters(params);
	crf.print();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:TestCRF.java

示例5: testSpaceSerializable

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceSerializable () throws IOException, ClassNotFoundException
{
  Pipe p = makeSpacePredictionPipe ();
  InstanceList training = new InstanceList (p);
  training.addThruPipe (new ArrayIterator (data));

  MEMM memm = new MEMM (p, null);
  memm.addFullyConnectedStatesForLabels ();
  memm.addStartState();
  memm.setWeightsDimensionAsIn(training);
 MEMMTrainer memmt = new MEMMTrainer (memm);
  memmt.train (training, 10);

  MEMM memm2 = (MEMM) TestSerializable.cloneViaSerialization (memm);

  Optimizable.ByGradientValue mcrf1 = memmt.getOptimizableMEMM(training);
  double val1 = mcrf1.getValue ();
  Optimizable.ByGradientValue mcrf2 = memmt.getOptimizableMEMM(training);
  double val2 = mcrf2.getValue ();

  assertEquals (val1, val2, 1e-5);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestMEMM.java

示例6: testMultiTagSerialization

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException
{
  Pipe origPipe = new SerialPipes (new Pipe[] {
          new SimpleTaggerSentence2TokenSequence (),
          new TokenText (),
          new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
          new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
          new OffsetFeatureConjunction ("time",
                  new String[] { "digits", "ampm" },
                  new int[] { 0, 1 },
                  true),
          new PrintInputAndTarget (),
  });

  Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
  InstanceList mtLst = new InstanceList (mtPipe);
  mtLst.addThruPipe (new ArrayIterator (doc1));
  Instance mtInst = mtLst.get (0);
  TokenSequence mtTs = (TokenSequence) mtInst.getData ();
  assertEquals (6, mtTs.size ());
  assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
  assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:TestOffsetFeatureConjunctions.java

示例7: disabledtestPrint

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void disabledtestPrint ()
{
	Pipe p = new SerialPipes (new Pipe[] {
     new CharSequence2TokenSequence("."),
		 new TokenText(),
		 new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(),
		 new TokenSequence2FeatureVectorSequence(),
		 new PrintInputAndTarget(),
  });
	InstanceList one = new InstanceList (p);
	String[] data = new String[] { "ABCDE", };
	one.addThruPipe (new ArrayIterator (data));
	MEMM crf = new MEMM (p, null);
	crf.addFullyConnectedStatesForLabels();
	crf.setWeightsDimensionAsIn (one);
	MEMMTrainer memmt = new MEMMTrainer (crf);
	MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one);
	double[] params = new double[mcrf.getNumParameters()];
	for (int i = 0; i < params.length; i++) {
		params [i] = i;
	}
	mcrf.setParameters (params);
	crf.print ();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:25,代码来源:TestMEMM.java

示例8: createExtractionFrom

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private Extraction createExtractionFrom (String[] predStrings, String[] trueStrings)
{
  Pipe pipe = new SerialPipes (new Pipe[] {
    new SGML2TokenSequence (new CharSequenceLexer (CharSequenceLexer.LEX_NONWHITESPACE_CLASSES	), "O"),
    new Target2LabelSequence (),
    new PrintInputAndTarget (),
  });

  InstanceList pred = new InstanceList (pipe);
  pred.addThruPipe (new ArrayIterator (predStrings));

  InstanceList targets = new InstanceList (pipe);
  targets.addThruPipe (new ArrayIterator (trueStrings));

  LabelAlphabet dict = (LabelAlphabet) pipe.getTargetAlphabet ();
  Extraction extraction = new Extraction (null, dict);

  for (int i = 0; i < pred.size(); i++) {
    Instance aPred = pred.get (i);
    Instance aTarget = targets.get (i);
    Tokenization input = (Tokenization) aPred.getData ();
    Sequence predSeq = (Sequence) aPred.getTarget ();
    Sequence targetSeq = (Sequence) aTarget.getTarget ();
    DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O");
    extraction.addDocumentExtraction (docextr);
  }

  return extraction;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:30,代码来源:TestPerDocumentF1Evaluator.java

示例9: annotate

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public List<ReferenceLineAnnotation> annotate(List<String> linesWithLayout) throws IOException, AnalysisException {

        StringBuilder lineStringBuilder = new StringBuilder();
        for (String line : linesWithLayout) {
            lineStringBuilder.append(line).append(System.lineSeparator());
        }
        BufferedReader lineReader = new BufferedReader(new StringReader(lineStringBuilder.toString()));

        InstanceList inputInstances = new InstanceList(this.pipe);
        inputInstances.addThruPipe(new LineGroupIterator(lineReader, Pattern.compile("^\\s*$"), true));
        lineReader.close();

        List<ReferenceLineAnnotation> referenceLineAnnotations = new ArrayList<ReferenceLineAnnotation>();

        for (Instance instance : inputInstances) {
            @SuppressWarnings("unchecked")
            Sequence<String> inputSequence = (Sequence<String>) instance.getData();
            SumLatticeDefault latticeDefault = new SumLatticeDefault(this.crf, inputSequence);
            Alphabet outputAlphabet = this.crf.getOutputAlphabet();
            if (linesWithLayout.size() != inputSequence.size()) {
                throw new IllegalStateException("linesWithLayout.size()!=inputSequence.size()");
            }
            for (int i = 0; i < inputSequence.size(); i++) {
                ReferenceLineAnnotation referenceLineAnnotation = new ReferenceLineAnnotation(
                        linesWithLayout.get(i).split("\\t")[0]);
                for (int j = 1; j <= outputAlphabet.size(); j++) {
                    State state = this.crf.getState(j);
                    referenceLineAnnotation.addAnnotation(state.getName(),
                            latticeDefault.getGammaProbability(i + 1, state));
                }
                referenceLineAnnotations.add(referenceLineAnnotation);
            }
        }
        return referenceLineAnnotations;
    }
 
开发者ID:exciteproject,项目名称:refext,代码行数:36,代码来源:ReferenceLineAnnotator.java

示例10: testSpaceViewer

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceViewer () throws FileNotFoundException
{
  Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
  String[] data0 = { TestCRF.data[0] };
  String[] data1 = { TestCRF.data[1] };

  InstanceList training = new InstanceList (pipe);
  training.addThruPipe (new ArrayIterator (data0));
  InstanceList testing = new InstanceList (pipe);
  testing.addThruPipe (new ArrayIterator (data1));

  CRF crf = new CRF (pipe, null);
  crf.addFullyConnectedStatesForLabels ();
  CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
  crft.trainIncremental (training);

  CRFExtractor extor = hackCrfExtor (crf);
  Extraction extration = extor.extract (new ArrayIterator (data1));

  PrintStream out = new PrintStream (new FileOutputStream (htmlFile));
  LatticeViewer.extraction2html (extration, extor, out);
  out.close();

  out = new PrintStream (new FileOutputStream (latticeFile));
  LatticeViewer.extraction2html (extration, extor, out, true);
  out.close();


}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:30,代码来源:TestLatticeViewer.java

示例11: setupData

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
protected void setupData ()
{
  Timing timing = new Timing ();
  training = new InstanceList (featurePipe);
  training.addThruPipe (new PipedIterator (trainIterator, tokPipe));
  if (trainingPct > 0) training = subsetData (training, trainingPct);

  if (testIterator != null) {
    testing = new InstanceList (featurePipe);
    testing.addThruPipe (new PipedIterator (testIterator, tokPipe));
    if (testingPct > 0) testing = subsetData (testing, trainingPct);
  }

  timing.tick ("Data loading");
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:16,代码来源:ACRFExtractorTrainer.java

示例12: testTrainStochasticGradient

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testTrainStochasticGradient() {
	Pipe p = makeSpacePredictionPipe();
	Pipe p2 = new TestCRF2String();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));
	InstanceList[] lists = instances.split(new double[] { .5, .5 });
	CRF crf = new CRF(p, p2);
	crf.addFullyConnectedStatesForLabels();
	crf.setWeightsDimensionAsIn(lists[0], false);
	CRFTrainerByStochasticGradient crft = new CRFTrainerByStochasticGradient(
			crf, 0.0001);
	System.out.println("Training Accuracy before training = "
			+ crf.averageTokenAccuracy(lists[0]));
	System.out.println("Testing  Accuracy before training = "
			+ crf.averageTokenAccuracy(lists[1]));
	System.out.println("Training...");
	// either fixed learning rate or selected on a sample
	crft.setLearningRateByLikelihood(lists[0]);
	// crft.setLearningRate(0.01);
	crft.train(lists[0], 100);
	crf.print();
	System.out.println("Training Accuracy after training = "
			+ crf.averageTokenAccuracy(lists[0]));
	System.out.println("Testing  Accuracy after training = "
			+ crf.averageTokenAccuracy(lists[1]));
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:TestCRF.java

示例13: main

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static void main(String[] args) {
	String htmldir = args[0];
	Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(),
			new CharSequenceRemoveHTML() });
	InstanceList list = new InstanceList(pipe);
	list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES));

	for (int index = 0; index < list.size(); index++) {
		Instance inst = list.get(index);
		System.err.println(inst.getData());
	}

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:14,代码来源:CharSequenceRemoveHTML.java

示例14: testXis

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testXis() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));

	CRF crf1 = new CRF(p, null);
	crf1.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
			crf1);
	crft1.train(instances, 10); // Let's get some parameters

	Instance inst = instances.get(0);
	Sequence input = (Sequence) inst.getData();
	SumLatticeDefault lattice = new SumLatticeDefault(crf1, input,
			(Sequence) inst.getTarget(), null, true);
	for (int ip = 0; ip < lattice.length() - 1; ip++) {
		for (int i = 0; i < crf1.numStates(); i++) {
			Transducer.State state = crf1.getState(i);
			Transducer.TransitionIterator it = state.transitionIterator(
					input, ip);
			double gamma = lattice.getGammaProbability(ip, state);
			double xiSum = 0;
			while (it.hasNext()) {
				Transducer.State dest = it.nextState();
				double xi = lattice.getXiProbability(ip, state, dest);
				xiSum += xi;
			}
			assertEquals(gamma, xiSum, 1e-5);
		}
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:33,代码来源:TestCRF.java

示例15: testSpaceMaximizable

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testSpaceMaximizable ()
  {
    Pipe p = makeSpacePredictionPipe ();
    InstanceList training = new InstanceList (p);
//    String[] data = { TestMEMM.data[0], }; // TestMEMM.data[1], TestMEMM.data[2], TestMEMM.data[3], };
//    String[] data = { "ab" };
    training.addThruPipe (new ArrayIterator (data));

//    CRF4 memm = new CRF4 (p, null);
    MEMM memm = new MEMM (p, null);
    memm.addFullyConnectedStatesForLabels ();
    memm.addStartState();
    memm.setWeightsDimensionAsIn(training);
    
	  MEMMTrainer memmt = new MEMMTrainer (memm);
//    memm.gatherTrainingSets (training); // ANNOYING: Need to set up per-instance training sets
    memmt.train (training, 1);  // Set weights dimension, gathers training sets, etc.

//    memm.print();
//    memm.printGradient = true;
//    memm.printInstanceLists();

//    memm.setGaussianPriorVariance (Double.POSITIVE_INFINITY);
    Optimizable.ByGradientValue mcrf = memmt.getOptimizableMEMM(training);
    TestOptimizable.setNumComponents (150);
    TestOptimizable.testValueAndGradient (mcrf);
  }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:TestMEMM.java


注:本文中的cc.mallet.types.InstanceList.addThruPipe方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。