Java Pipe.getDataAlphabet方法代码示例

本文整理汇总了Java中cc.mallet.pipe.Pipe.getDataAlphabet方法的典型用法代码示例。如果您正苦于以下问题：Java Pipe.getDataAlphabet方法的具体用法？Java Pipe.getDataAlphabet怎么用？Java Pipe.getDataAlphabet使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.pipe.Pipe的用法示例。

在下文中一共展示了Pipe.getDataAlphabet方法的14个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: extractIndependentFeaturesHelper

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
/**
 * Extract the independent features for a single instance annotation.
 * Extract the independent features for a single annotation according to the information
 * in the featureInfo object. The information in the featureInfo instance gets updated 
 * by this. 
 * NOTE: this method is static so that it can be used in the CorpusRepresentationMalletSeq class too.
 * @param instanceAnnotation
 * @param inputAS
 * @param targetFeatureName
 * @param featureInfo
 * @param pipe
 * @param nameFeature
 * @return 
 */
static Instance extractIndependentFeaturesHelper(
        Annotation instanceAnnotation,
        AnnotationSet inputAS,
        FeatureInfo featureInfo,
        Pipe pipe) {
  
  AugmentableFeatureVector afv = new AugmentableFeatureVector(pipe.getDataAlphabet());
  // Constructor parms: data, target, name, source
  Instance inst = new Instance(afv, null, null, null);
  for(FeatureSpecAttribute attr : featureInfo.getAttributes()) {
    FeatureExtraction.extractFeature(inst, attr, inputAS, instanceAnnotation);
  }
  // TODO: we destructively replace the AugmentableFeatureVector by a FeatureVector here,
  // but it is not clear if this is beneficial - our assumption is that yes.
  inst.setData(((AugmentableFeatureVector)inst.getData()).toFeatureVector());
  return inst;
}

开发者ID:GateNLP，项目名称:gateplugin-LearningFramework，代码行数:32，代码来源:CorpusRepresentationMalletTarget.java

示例2: CRF

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public CRF (Pipe inputPipe, Pipe outputPipe)
{
	super (inputPipe, outputPipe);
	this.inputAlphabet = inputPipe.getDataAlphabet();
	this.outputAlphabet = inputPipe.getTargetAlphabet();
	//inputAlphabet.stopGrowth();
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:8，代码来源:CRF.java

示例3: testTokenAccuracy

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testTokenAccuracy() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));
	InstanceList[] lists = instances.split(new Random(777), new double[] {
			.5, .5 });

	CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crft.setUseSparseWeights(true);

	crft.trainIncremental(lists[0]);

	TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
			new String[] { "Train", "Test" });
	eval.evaluateInstanceList(crft, lists[1], "Test");

	assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);

}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:23，代码来源:TestCRF.java

示例4: PagedInstanceList

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
/** Creates a PagedInstanceList where "instancesPerPage" instances
 * are swapped to disk in directory "swapDir" if the amount of free
 * system memory drops below "minFreeMemory" bytes
 * @param pipe instance pipe
 * @param numPages number of pages to keep in memory
 * @param instancesPerPage number of Instances to store in each page
 * @param swapDir where the pages on disk live.
 */
public PagedInstanceList (Pipe pipe, int numPages, int instancesPerPage, File swapDir) {
    super (pipe, numPages * instancesPerPage);
    this.instancesPerPage = instancesPerPage;
    this.swapDir = swapDir;
    this.inMemoryPageIds = new int[numPages];
    this.inMemoryPages = new InstanceList[numPages];
    this.noopPipe = new Noop(pipe.getDataAlphabet(), pipe.getTargetAlphabet());
    for (int i = 0; i < numPages; i++) {
        this.inMemoryPageIds[i] = -1;
    }

    try {
        if (!swapDir.exists()) {
            swapDir.mkdir();
        }
    } catch (SecurityException e) {
        System.err.println ("No permission to make directory " + swapDir);
        System.exit(-1);
    }
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:29，代码来源:PagedInstanceList.java

示例5: testConcatenatePipes

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testConcatenatePipes ()
{
  Pipe p1 = new StupidPipe ();
  Pipe p2 = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence ();
  // initialize p2's dict
  p2.instanceFrom(new Instance (data, null, null, null));

  assertEquals (3, p2.getDataAlphabet ().size());

  Pipe serial = PipeUtils.concatenatePipes (p1, p2);
  Alphabet dict = serial.getDataAlphabet ();

  assertEquals (3, dict.size ());
  assertTrue (dict == p2.getDataAlphabet ());
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:16，代码来源:TestPipeUtils.java

示例6: testConcatenateBadPipes

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testConcatenateBadPipes ()
{
  Pipe p1 = new SimpleTaggerSentence2TokenSequence ();
  // force resolving data alphabet
  Alphabet dict1 = p1.getDataAlphabet ();

  Pipe p2 = new SimpleTaggerSentence2TokenSequence ();
  // force resolving data alphabet
  Alphabet dict2 = p2.getDataAlphabet ();

  assertTrue (dict1 != dict2);

  try {
    PipeUtils.concatenatePipes (p1, p2);
    assertTrue ("Test failed: concatenatePipes() allowed putting together incompatible alphabets.", false);

  } catch (IllegalArgumentException e) {
    // Exception expected
  }
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:21，代码来源:TestPipeUtils.java

示例7: ACRF

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
/**
 *  Create a ACRF for a 1-d sequence.  Needs an array
 *   of Templates.
 */
public ACRF (Pipe inputPipe, Template[] tmpls)
  throws IllegalArgumentException
{
  this.inputPipe = inputPipe;
  this.templates = tmpls;
  this.inputAlphabet = inputPipe.getDataAlphabet();
  this.defaultFeatureIndex = inputAlphabet.size ();
  for (int tidx = 0; tidx < templates.length; tidx++) templates [tidx].index = tidx;
}

开发者ID:mimno，项目名称:GRMM，代码行数:14，代码来源:ACRF.java

示例8: resolveAlphabets

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
private void resolveAlphabets ()
{
	Alphabet da = null, ta = null;
	for (Pipe p : pipes) {
		p.preceedingPipeDataAlphabetNotification(da);
		da = p.getDataAlphabet();
		p.preceedingPipeTargetAlphabetNotification(ta);
		ta = p.getTargetAlphabet();
	}
	dataAlphabet = da;
	targetAlphabet = ta;
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:13，代码来源:SerialPipes.java

示例9: testPipesAreStupid

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testPipesAreStupid ()
{
  Pipe p1 = new StupidPipe ();
  Pipe p2 = new SimpleTaggerSentence2TokenSequence ();
  // initialize p2's dict
  p2.instanceFrom(new Instance (data, null, null, null));

  Pipe serial = new SerialPipes (new Pipe[] { p1, p2 });
  try {
    serial.getDataAlphabet ();
    assertTrue ("Test failed: Should have generated exception.", false);
  } catch (IllegalStateException e) {}
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:14，代码来源:TestPipeUtils.java

示例10: HMM

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public HMM(Pipe inputPipe, Pipe outputPipe) {
	this.inputPipe = inputPipe;
	this.outputPipe = outputPipe;
	this.inputAlphabet = inputPipe.getDataAlphabet();
	this.outputAlphabet = inputPipe.getTargetAlphabet();
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:7，代码来源:HMM.java

示例11: testAddOrderNStates

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testAddOrderNStates() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));
	InstanceList[] lists = instances.split(new java.util.Random(678),
			new double[] { .5, .5 });

	// Compare 3 CRFs trained with addOrderNStates, and make sure
	// that having more features leads to a higher likelihood

	CRF crf1 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf1.addOrderNStates(lists[0], new int[] { 1, },
			new boolean[] { false, }, "START", null, null, false);
	new CRFTrainerByLabelLikelihood(crf1).trainIncremental(lists[0]);

	CRF crf2 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf2.addOrderNStates(lists[0], new int[] { 1, 2, }, new boolean[] {
			false, true }, "START", null, null, false);
	new CRFTrainerByLabelLikelihood(crf2).trainIncremental(lists[0]);

	CRF crf3 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf3.addOrderNStates(lists[0], new int[] { 1, 2, }, new boolean[] {
			false, false }, "START", null, null, false);
	new CRFTrainerByLabelLikelihood(crf3).trainIncremental(lists[0]);

	// Prevent cached values
	double lik1 = getLikelihood(crf1, lists[0]);
	double lik2 = getLikelihood(crf2, lists[0]);
	double lik3 = getLikelihood(crf3, lists[0]);

	System.out.println("CRF1 likelihood " + lik1);

	assertTrue("Final zero-order likelihood <" + lik1
			+ "> greater than first-order <" + lik2 + ">", lik1 < lik2);
	assertTrue("Final defaults-only likelihood <" + lik2
			+ "> greater than full first-order <" + lik3 + ">", lik2 < lik3);

	assertEquals(-167.2234457483949, lik1, 0.0001);
	assertEquals(-165.81326484466342, lik2, 0.0001);
	assertEquals(-90.37680146432787, lik3, 0.0001);
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:43，代码来源:TestCRF.java

示例12: testFrozenWeights

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testFrozenWeights() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));

	CRF crf1 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf1.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
			crf1);
	crft1.trainIncremental(instances);

	CRF crf2 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf2.addFullyConnectedStatesForLabels();
	// Freeze some weights, before training
	for (int i = 0; i < crf2.getWeights().length; i += 2)
		crf2.freezeWeights(i);
	CRFTrainerByLabelLikelihood crft2 = new CRFTrainerByLabelLikelihood(
			crf2);
	crft2.trainIncremental(instances);

	SparseVector[] w = crf2.getWeights();
	double[] b = crf2.getDefaultWeights();
	for (int i = 0; i < w.length; i += 2) {
		assertEquals(0.0, b[i], 1e-10);
		for (int loc = 0; loc < w[i].numLocations(); loc++) {
			assertEquals(0.0, w[i].valueAtLocation(loc), 1e-10);
		}
	}

	// Check that the frozen weights has worse likelihood
	Optimizable.ByGradientValue optable1 = crft1
			.getOptimizableCRF(instances);
	Optimizable.ByGradientValue optable2 = crft2
			.getOptimizableCRF(instances);
	double val1 = optable1.getValue();
	double val2 = optable2.getValue();
	assertTrue(
			"Error: Freezing weights does not harm log-likelihood!  Full "
					+ val1 + ", Frozen " + val2, val1 > val2);
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:42，代码来源:TestCRF.java

示例13: disabledtestAddOrderNStates

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void disabledtestAddOrderNStates ()
{
   Pipe p = makeSpacePredictionPipe ();

   InstanceList instances = new InstanceList (p);
  instances.addThruPipe (new ArrayIterator(data));
  InstanceList[] lists = instances.split (new java.util.Random (678), new double[]{.5, .5});

	// Compare 3 CRFs trained with addOrderNStates, and make sure
	// that having more features leads to a higher likelihood

  MEMM crf1 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
  crf1.addOrderNStates (lists [0],
											 new int[] { 1, },
											 new boolean[] { false, },
											 "START",
											 null,
											 null,
											 false);
  crf1.setWeightsDimensionAsIn(lists[0]);
  MEMMTrainer memmt1 = new MEMMTrainer (crf1);
	memmt1.train(lists [0]);


  MEMM crf2 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
  crf2.addOrderNStates (lists [0],
												 new int[] { 1, 2, },
												 new boolean[] { false, true },
												 "START",
												 null,
												 null,
												 false);
  crf2.setWeightsDimensionAsIn(lists[0]);
  MEMMTrainer memmt2 = new MEMMTrainer (crf2);
	memmt2.train(lists [0]);


  MEMM crf3 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
  crf3.addOrderNStates (lists [0],
											 new int[] { 1, 2, },
											 new boolean[] { false, false },
											 "START",
											 null,
											 null,
											 false);
  crf3.setWeightsDimensionAsIn(lists[0]);
  MEMMTrainer memmt3 = new MEMMTrainer (crf3);
	memmt3.train(lists [0]);

	// Prevent cached values
	double lik1 = getLikelihood (memmt1, lists[0]);
	double lik2 = getLikelihood (memmt2, lists[0]);
	double lik3 = getLikelihood (memmt3, lists[0]);

	System.out.println("CRF1 likelihood "+lik1);

	assertTrue ("Final zero-order likelihood <"+lik1+"> greater than first-order <"+lik2+">",
							lik1 < lik2);
	assertTrue ("Final defaults-only likelihood <"+lik2+"> greater than full first-order <"+lik3+">",
							lik2 < lik3);

	assertEquals (-167.335971702, lik1, 0.0001);
	assertEquals (-166.212235389, lik2, 0.0001);
	assertEquals ( -90.386005741, lik3, 0.0001);
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:66，代码来源:TestMEMM.java

示例14: NaiveBayesTrainer

import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public NaiveBayesTrainer (Pipe instancePipe) {
	this.instancePipe = instancePipe;
	this.dataAlphabet = instancePipe.getDataAlphabet();
	this.targetAlphabet = instancePipe.getTargetAlphabet();
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:6，代码来源:NaiveBayesTrainer.java

注：本文中的cc.mallet.pipe.Pipe.getDataAlphabet方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。