本文整理汇总了Java中cc.mallet.pipe.Pipe.getDataAlphabet方法的典型用法代码示例。如果您正苦于以下问题:Java Pipe.getDataAlphabet方法的具体用法?Java Pipe.getDataAlphabet怎么用?Java Pipe.getDataAlphabet使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.pipe.Pipe
的用法示例。
在下文中一共展示了Pipe.getDataAlphabet方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: extractIndependentFeaturesHelper
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
/**
* Extract the independent features for a single instance annotation.
* Extract the independent features for a single annotation according to the information
* in the featureInfo object. The information in the featureInfo instance gets updated
* by this.
* NOTE: this method is static so that it can be used in the CorpusRepresentationMalletSeq class too.
* @param instanceAnnotation
* @param inputAS
* @param targetFeatureName
* @param featureInfo
* @param pipe
* @param nameFeature
* @return
*/
static Instance extractIndependentFeaturesHelper(
Annotation instanceAnnotation,
AnnotationSet inputAS,
FeatureInfo featureInfo,
Pipe pipe) {
AugmentableFeatureVector afv = new AugmentableFeatureVector(pipe.getDataAlphabet());
// Constructor parms: data, target, name, source
Instance inst = new Instance(afv, null, null, null);
for(FeatureSpecAttribute attr : featureInfo.getAttributes()) {
FeatureExtraction.extractFeature(inst, attr, inputAS, instanceAnnotation);
}
// TODO: we destructively replace the AugmentableFeatureVector by a FeatureVector here,
// but it is not clear if this is beneficial - our assumption is that yes.
inst.setData(((AugmentableFeatureVector)inst.getData()).toFeatureVector());
return inst;
}
示例2: CRF
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public CRF (Pipe inputPipe, Pipe outputPipe)
{
super (inputPipe, outputPipe);
this.inputAlphabet = inputPipe.getDataAlphabet();
this.outputAlphabet = inputPipe.getTargetAlphabet();
//inputAlphabet.stopGrowth();
}
示例3: testTokenAccuracy
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testTokenAccuracy() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
InstanceList[] lists = instances.split(new Random(777), new double[] {
.5, .5 });
CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crft.setUseSparseWeights(true);
crft.trainIncremental(lists[0]);
TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
new String[] { "Train", "Test" });
eval.evaluateInstanceList(crft, lists[1], "Test");
assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);
}
示例4: PagedInstanceList
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
/** Creates a PagedInstanceList where "instancesPerPage" instances
* are swapped to disk in directory "swapDir" if the amount of free
* system memory drops below "minFreeMemory" bytes
* @param pipe instance pipe
* @param numPages number of pages to keep in memory
* @param instancesPerPage number of Instances to store in each page
* @param swapDir where the pages on disk live.
*/
public PagedInstanceList (Pipe pipe, int numPages, int instancesPerPage, File swapDir) {
super (pipe, numPages * instancesPerPage);
this.instancesPerPage = instancesPerPage;
this.swapDir = swapDir;
this.inMemoryPageIds = new int[numPages];
this.inMemoryPages = new InstanceList[numPages];
this.noopPipe = new Noop(pipe.getDataAlphabet(), pipe.getTargetAlphabet());
for (int i = 0; i < numPages; i++) {
this.inMemoryPageIds[i] = -1;
}
try {
if (!swapDir.exists()) {
swapDir.mkdir();
}
} catch (SecurityException e) {
System.err.println ("No permission to make directory " + swapDir);
System.exit(-1);
}
}
示例5: testConcatenatePipes
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testConcatenatePipes ()
{
Pipe p1 = new StupidPipe ();
Pipe p2 = new SimpleTagger.SimpleTaggerSentence2FeatureVectorSequence ();
// initialize p2's dict
p2.instanceFrom(new Instance (data, null, null, null));
assertEquals (3, p2.getDataAlphabet ().size());
Pipe serial = PipeUtils.concatenatePipes (p1, p2);
Alphabet dict = serial.getDataAlphabet ();
assertEquals (3, dict.size ());
assertTrue (dict == p2.getDataAlphabet ());
}
示例6: testConcatenateBadPipes
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testConcatenateBadPipes ()
{
Pipe p1 = new SimpleTaggerSentence2TokenSequence ();
// force resolving data alphabet
Alphabet dict1 = p1.getDataAlphabet ();
Pipe p2 = new SimpleTaggerSentence2TokenSequence ();
// force resolving data alphabet
Alphabet dict2 = p2.getDataAlphabet ();
assertTrue (dict1 != dict2);
try {
PipeUtils.concatenatePipes (p1, p2);
assertTrue ("Test failed: concatenatePipes() allowed putting together incompatible alphabets.", false);
} catch (IllegalArgumentException e) {
// Exception expected
}
}
示例7: ACRF
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
/**
* Create a ACRF for a 1-d sequence. Needs an array
* of Templates.
*/
public ACRF (Pipe inputPipe, Template[] tmpls)
throws IllegalArgumentException
{
this.inputPipe = inputPipe;
this.templates = tmpls;
this.inputAlphabet = inputPipe.getDataAlphabet();
this.defaultFeatureIndex = inputAlphabet.size ();
for (int tidx = 0; tidx < templates.length; tidx++) templates [tidx].index = tidx;
}
示例8: resolveAlphabets
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
private void resolveAlphabets ()
{
Alphabet da = null, ta = null;
for (Pipe p : pipes) {
p.preceedingPipeDataAlphabetNotification(da);
da = p.getDataAlphabet();
p.preceedingPipeTargetAlphabetNotification(ta);
ta = p.getTargetAlphabet();
}
dataAlphabet = da;
targetAlphabet = ta;
}
示例9: testPipesAreStupid
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testPipesAreStupid ()
{
Pipe p1 = new StupidPipe ();
Pipe p2 = new SimpleTaggerSentence2TokenSequence ();
// initialize p2's dict
p2.instanceFrom(new Instance (data, null, null, null));
Pipe serial = new SerialPipes (new Pipe[] { p1, p2 });
try {
serial.getDataAlphabet ();
assertTrue ("Test failed: Should have generated exception.", false);
} catch (IllegalStateException e) {}
}
示例10: HMM
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public HMM(Pipe inputPipe, Pipe outputPipe) {
this.inputPipe = inputPipe;
this.outputPipe = outputPipe;
this.inputAlphabet = inputPipe.getDataAlphabet();
this.outputAlphabet = inputPipe.getTargetAlphabet();
}
示例11: testAddOrderNStates
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testAddOrderNStates() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
InstanceList[] lists = instances.split(new java.util.Random(678),
new double[] { .5, .5 });
// Compare 3 CRFs trained with addOrderNStates, and make sure
// that having more features leads to a higher likelihood
CRF crf1 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf1.addOrderNStates(lists[0], new int[] { 1, },
new boolean[] { false, }, "START", null, null, false);
new CRFTrainerByLabelLikelihood(crf1).trainIncremental(lists[0]);
CRF crf2 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf2.addOrderNStates(lists[0], new int[] { 1, 2, }, new boolean[] {
false, true }, "START", null, null, false);
new CRFTrainerByLabelLikelihood(crf2).trainIncremental(lists[0]);
CRF crf3 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf3.addOrderNStates(lists[0], new int[] { 1, 2, }, new boolean[] {
false, false }, "START", null, null, false);
new CRFTrainerByLabelLikelihood(crf3).trainIncremental(lists[0]);
// Prevent cached values
double lik1 = getLikelihood(crf1, lists[0]);
double lik2 = getLikelihood(crf2, lists[0]);
double lik3 = getLikelihood(crf3, lists[0]);
System.out.println("CRF1 likelihood " + lik1);
assertTrue("Final zero-order likelihood <" + lik1
+ "> greater than first-order <" + lik2 + ">", lik1 < lik2);
assertTrue("Final defaults-only likelihood <" + lik2
+ "> greater than full first-order <" + lik3 + ">", lik2 < lik3);
assertEquals(-167.2234457483949, lik1, 0.0001);
assertEquals(-165.81326484466342, lik2, 0.0001);
assertEquals(-90.37680146432787, lik3, 0.0001);
}
示例12: testFrozenWeights
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void testFrozenWeights() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
CRF crf1 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf1.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
crf1);
crft1.trainIncremental(instances);
CRF crf2 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf2.addFullyConnectedStatesForLabels();
// Freeze some weights, before training
for (int i = 0; i < crf2.getWeights().length; i += 2)
crf2.freezeWeights(i);
CRFTrainerByLabelLikelihood crft2 = new CRFTrainerByLabelLikelihood(
crf2);
crft2.trainIncremental(instances);
SparseVector[] w = crf2.getWeights();
double[] b = crf2.getDefaultWeights();
for (int i = 0; i < w.length; i += 2) {
assertEquals(0.0, b[i], 1e-10);
for (int loc = 0; loc < w[i].numLocations(); loc++) {
assertEquals(0.0, w[i].valueAtLocation(loc), 1e-10);
}
}
// Check that the frozen weights has worse likelihood
Optimizable.ByGradientValue optable1 = crft1
.getOptimizableCRF(instances);
Optimizable.ByGradientValue optable2 = crft2
.getOptimizableCRF(instances);
double val1 = optable1.getValue();
double val2 = optable2.getValue();
assertTrue(
"Error: Freezing weights does not harm log-likelihood! Full "
+ val1 + ", Frozen " + val2, val1 > val2);
}
示例13: disabledtestAddOrderNStates
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public void disabledtestAddOrderNStates ()
{
Pipe p = makeSpacePredictionPipe ();
InstanceList instances = new InstanceList (p);
instances.addThruPipe (new ArrayIterator(data));
InstanceList[] lists = instances.split (new java.util.Random (678), new double[]{.5, .5});
// Compare 3 CRFs trained with addOrderNStates, and make sure
// that having more features leads to a higher likelihood
MEMM crf1 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
crf1.addOrderNStates (lists [0],
new int[] { 1, },
new boolean[] { false, },
"START",
null,
null,
false);
crf1.setWeightsDimensionAsIn(lists[0]);
MEMMTrainer memmt1 = new MEMMTrainer (crf1);
memmt1.train(lists [0]);
MEMM crf2 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
crf2.addOrderNStates (lists [0],
new int[] { 1, 2, },
new boolean[] { false, true },
"START",
null,
null,
false);
crf2.setWeightsDimensionAsIn(lists[0]);
MEMMTrainer memmt2 = new MEMMTrainer (crf2);
memmt2.train(lists [0]);
MEMM crf3 = new MEMM(p.getDataAlphabet(), p.getTargetAlphabet());
crf3.addOrderNStates (lists [0],
new int[] { 1, 2, },
new boolean[] { false, false },
"START",
null,
null,
false);
crf3.setWeightsDimensionAsIn(lists[0]);
MEMMTrainer memmt3 = new MEMMTrainer (crf3);
memmt3.train(lists [0]);
// Prevent cached values
double lik1 = getLikelihood (memmt1, lists[0]);
double lik2 = getLikelihood (memmt2, lists[0]);
double lik3 = getLikelihood (memmt3, lists[0]);
System.out.println("CRF1 likelihood "+lik1);
assertTrue ("Final zero-order likelihood <"+lik1+"> greater than first-order <"+lik2+">",
lik1 < lik2);
assertTrue ("Final defaults-only likelihood <"+lik2+"> greater than full first-order <"+lik3+">",
lik2 < lik3);
assertEquals (-167.335971702, lik1, 0.0001);
assertEquals (-166.212235389, lik2, 0.0001);
assertEquals ( -90.386005741, lik3, 0.0001);
}
示例14: NaiveBayesTrainer
import cc.mallet.pipe.Pipe; //导入方法依赖的package包/类
public NaiveBayesTrainer (Pipe instancePipe) {
this.instancePipe = instancePipe;
this.dataAlphabet = instancePipe.getDataAlphabet();
this.targetAlphabet = instancePipe.getTargetAlphabet();
}