当前位置: 首页>>代码示例>>Java>>正文


Java Pipe类代码示例

本文整理汇总了Java中cc.mallet.pipe.Pipe的典型用法代码示例。如果您正苦于以下问题:Java Pipe类的具体用法?Java Pipe怎么用?Java Pipe使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Pipe类属于cc.mallet.pipe包,在下文中一共展示了Pipe类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testSpaceViewer

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testSpaceViewer () throws IOException
{
  Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
  String[] data0 = { TestCRF.data[0] };
  String[] data1 = { TestCRF.data[1] };

  InstanceList training = new InstanceList (pipe);
  training.addThruPipe (new ArrayIterator (data0));
  InstanceList testing = new InstanceList (pipe);
  testing.addThruPipe (new ArrayIterator (data1));

  CRF crf = new CRF (pipe, null);
  crf.addFullyConnectedStatesForLabels ();
  CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
  crft.trainIncremental (training);

  CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf);
  Extraction extraction = extor.extract (new ArrayIterator (data1));

  if (!outputDir.exists ()) outputDir.mkdir ();
  DocumentViewer.writeExtraction (outputDir, extraction);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestDocumentViewer.java

示例2: extractIndependentFeaturesHelper

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
/**
 * Extract the independent features for a single instance annotation.
 * Extract the independent features for a single annotation according to the information
 * in the featureInfo object. The information in the featureInfo instance gets updated 
 * by this. 
 * NOTE: this method is static so that it can be used in the CorpusRepresentationMalletSeq class too.
 * @param instanceAnnotation
 * @param inputAS
 * @param targetFeatureName
 * @param featureInfo
 * @param pipe
 * @param nameFeature
 * @return 
 */
static Instance extractIndependentFeaturesHelper(
        Annotation instanceAnnotation,
        AnnotationSet inputAS,
        FeatureInfo featureInfo,
        Pipe pipe) {
  
  AugmentableFeatureVector afv = new AugmentableFeatureVector(pipe.getDataAlphabet());
  // Constructor parms: data, target, name, source
  Instance inst = new Instance(afv, null, null, null);
  for(FeatureSpecAttribute attr : featureInfo.getAttributes()) {
    FeatureExtraction.extractFeature(inst, attr, inputAS, instanceAnnotation);
  }
  // TODO: we destructively replace the AugmentableFeatureVector by a FeatureVector here,
  // but it is not clear if this is beneficial - our assumption is that yes.
  inst.setData(((AugmentableFeatureVector)inst.getData()).toFeatureVector());
  return inst;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:32,代码来源:CorpusRepresentationMalletTarget.java

示例3: CRF

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public CRF (Pipe inputPipe, Pipe outputPipe)
{
	super (inputPipe, outputPipe);
	this.inputAlphabet = inputPipe.getDataAlphabet();
	this.outputAlphabet = inputPipe.getTargetAlphabet();
	//inputAlphabet.stopGrowth();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:8,代码来源:CRF.java

示例4: testTokenAccuracy

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testTokenAccuracy() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));
	InstanceList[] lists = instances.split(new Random(777), new double[] {
			.5, .5 });

	CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crft.setUseSparseWeights(true);

	crft.trainIncremental(lists[0]);

	TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
			new String[] { "Train", "Test" });
	eval.evaluateInstanceList(crft, lists[1], "Test");

	assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestCRF.java

示例5: testSpaceSerializable

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testSpaceSerializable () throws IOException, ClassNotFoundException
{
  Pipe p = makeSpacePredictionPipe ();
  InstanceList training = new InstanceList (p);
  training.addThruPipe (new ArrayIterator (data));

  MEMM memm = new MEMM (p, null);
  memm.addFullyConnectedStatesForLabels ();
  memm.addStartState();
  memm.setWeightsDimensionAsIn(training);
 MEMMTrainer memmt = new MEMMTrainer (memm);
  memmt.train (training, 10);

  MEMM memm2 = (MEMM) TestSerializable.cloneViaSerialization (memm);

  Optimizable.ByGradientValue mcrf1 = memmt.getOptimizableMEMM(training);
  double val1 = mcrf1.getValue ();
  Optimizable.ByGradientValue mcrf2 = memmt.getOptimizableMEMM(training);
  double val2 = mcrf2.getValue ();

  assertEquals (val1, val2, 1e-5);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestMEMM.java

示例6: makeSpacePredictionPipe

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static  Pipe makeSpacePredictionPipe ()
  {
    Pipe p = new SerialPipes(new Pipe[]{
	    new CharSequence2TokenSequence("."),
	    new TokenSequenceLowercase(),
	    new TestMEMMTokenSequenceRemoveSpaces(),
	    new TokenText(),
	    new OffsetConjunctions(true,
	                           new int[][]{//{0}, /*{1},{-1,0},{0,1}, */
	                             {1}, {-1, 0}, {0, 1},
//	                             {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3},
	                             //{-2,-1}, {-1,0}, {0,1}, {1,2},
	                             //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3},
	                           }),
//      new PrintInputAndTarget(),
	    new TokenSequence2FeatureVectorSequence()
	  });
    return p;
  }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:TestMEMM.java

示例7: InstanceList

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
/**
 * Creates a list consisting of randomly-generated
 * <code>FeatureVector</code>s.
 */
// xxx Perhaps split these out into a utility class
public InstanceList (Randoms r,
                     // the generator of all random-ness used here
                     Dirichlet classCentroidDistribution,
                     // includes a Alphabet
                     double classCentroidAverageAlphaMean,
                     // Gaussian mean on the sum of alphas
                     double classCentroidAverageAlphaVariance,
                     // Gaussian variance on the sum of alphas
                     double featureVectorSizePoissonLambda,
                     double classInstanceCountPoissonLambda,
                     String[] classNames)
{
	this (new SerialPipes (new Pipe[]	{
			new TokenSequence2FeatureSequence (),
			new FeatureSequence2FeatureVector (),
			new Target2Label()}));
	//classCentroidDistribution.print();
	Iterator<Instance> iter = new RandomTokenSequenceIterator (
			r, classCentroidDistribution,
			classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance,
			featureVectorSizePoissonLambda, classInstanceCountPoissonLambda,
			classNames);
	this.addThruPipe (iter);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:30,代码来源:InstanceList.java

示例8: MaxEnt

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public MaxEnt (Pipe dataPipe,
			double[] parameters,
			FeatureSelection featureSelection,
			FeatureSelection[] perClassFeatureSelection)
	{
		super (dataPipe);
		assert (featureSelection == null || perClassFeatureSelection == null);
		if (parameters != null)
			this.parameters = parameters;
		else
			this.parameters = new double[getNumParameters(dataPipe)];
		this.featureSelection = featureSelection;
		this.perClassFeatureSelection = perClassFeatureSelection;
		this.defaultFeatureIndex = dataPipe.getDataAlphabet().size();
//		assert (parameters.getNumCols() == defaultFeatureIndex+1);
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:MaxEnt.java

示例9: testMultiTagSerialization

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException
{
  Pipe origPipe = new SerialPipes (new Pipe[] {
          new SimpleTaggerSentence2TokenSequence (),
          new TokenText (),
          new RegexMatches ("digits", Pattern.compile ("[0-9]+")),
          new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
          new OffsetFeatureConjunction ("time",
                  new String[] { "digits", "ampm" },
                  new int[] { 0, 1 },
                  true),
          new PrintInputAndTarget (),
  });

  Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe);
  InstanceList mtLst = new InstanceList (mtPipe);
  mtLst.addThruPipe (new ArrayIterator (doc1));
  Instance mtInst = mtLst.get (0);
  TokenSequence mtTs = (TokenSequence) mtInst.getData ();
  assertEquals (6, mtTs.size ());
  assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15);
  assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:TestOffsetFeatureConjunctions.java

示例10: concatenatePipes

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static Pipe concatenatePipes (Pipe p1, Pipe p2)
{
  Alphabet dataDict = combinedDataDicts (p1, p2);
  Alphabet targetDict = combinedTargetDicts (p1, p2);
  Pipe ret = new SerialPipes (new Pipe[] { p1, p2 });

  if (dataDict != null) ret.dataAlphabetResolved = true;
  if (targetDict != null) ret.targetAlphabetResolved = true;
  
  ret.dataAlphabet = dataDict;
  ret.targetAlphabet = targetDict;
  return ret;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:14,代码来源:PipeUtils.java

示例11: testConcatenatePipes

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testConcatenatePipes ()
{
  Pipe p1 = new StupidPipe ();
  Pipe p2 = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence ();
  // initialize p2's dict
  p2.instanceFrom(new Instance (data, null, null, null));

  assertEquals (3, p2.getDataAlphabet ().size());

  Pipe serial = PipeUtils.concatenatePipes (p1, p2);
  Alphabet dict = serial.getDataAlphabet ();

  assertEquals (3, dict.size ());
  assertTrue (dict == p2.getDataAlphabet ());
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:16,代码来源:TestPipeUtils.java

示例12: testParenGroupIterator

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public void testParenGroupIterator ()
{
	String input = "(a (b c) ((d))  ) f\n\n (3\n 4) (  6) ";
	Reader reader = new StringReader (input);
	ParenGroupIterator it = new ParenGroupIterator (reader);
	Pipe pipe = new Noop();
	pipe.setTargetProcessing (false);

	InstanceList lst = new InstanceList (pipe);
	lst.addThruPipe (it);

	assertEquals (3, lst.size());
	assertEquals ("(a (b c) ((d))  )", lst.get(0).getData());
	assertEquals ("(3\n 4)", lst.get(1).getData());
	assertEquals ("(  6)", lst.get(2).getData());
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:TestIterators.java

示例13: main

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public static void main (String[] args) throws FileNotFoundException
{
  File trainFile = new File (args[0]);
  File testFile = new File (args[1]);
  File crfFile = new File (args[2]);

  Pipe pipe = new SerialPipes (new Pipe[] {
      new GenericAcrfData2TokenSequence (2),
      new TokenSequence2FeatureVectorSequence (true, true),
  });

  InstanceList training = new InstanceList (pipe);
  training.addThruPipe (new LineGroupIterator (new FileReader (trainFile),
                                       Pattern.compile ("\\s*"),
                                       true));

  InstanceList testing = new InstanceList (pipe);
  testing.addThruPipe (new LineGroupIterator (new FileReader (testFile),
                                       Pattern.compile ("\\s*"),
                                       true));

  ACRF.Template[] tmpls = new ACRF.Template[] {
          new ACRF.BigramTemplate (0),
          new ACRF.BigramTemplate (1),
          new ACRF.PairwiseFactorTemplate (0,1),
          new CrossTemplate1 (0,1)
  };

  ACRF acrf = new ACRF (pipe, tmpls);

  ACRFTrainer trainer = new DefaultAcrfTrainer ();
  trainer.train (acrf, training, null, testing, 99999);

  FileUtils.writeGzippedObject (crfFile, acrf);
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:36,代码来源:SimpleCrfExample.java

示例14: ACRF

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
/**
 *  Create a ACRF for a 1-d sequence.  Needs an array
 *   of Templates.
 */
public ACRF (Pipe inputPipe, Template[] tmpls)
  throws IllegalArgumentException
{
  this.inputPipe = inputPipe;
  this.templates = tmpls;
  this.inputAlphabet = inputPipe.getDataAlphabet();
  this.defaultFeatureIndex = inputAlphabet.size ();
  for (int tidx = 0; tidx < templates.length; tidx++) templates [tidx].index = tidx;
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:14,代码来源:ACRF.java

示例15: setPipes

import cc.mallet.pipe.Pipe; //导入依赖的package包/类
public ACRFExtractorTrainer setPipes (Pipe tokPipe, Pipe featurePipe)
{
  RememberTokenizationPipe rtp = new RememberTokenizationPipe ();
  this.featurePipe = PipeUtils.concatenatePipes (rtp, featurePipe);
  this.tokPipe = tokPipe;
  return this;
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:8,代码来源:ACRFExtractorTrainer.java


注:本文中的cc.mallet.pipe.Pipe类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。