本文整理汇总了Java中cc.mallet.types.InstanceList类的典型用法代码示例。如果您正苦于以下问题:Java InstanceList类的具体用法?Java InstanceList怎么用?Java InstanceList使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
InstanceList类属于cc.mallet.types包,在下文中一共展示了InstanceList类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: split
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void split (FeatureSelection fs)
{
if (ilist == null)
throw new IllegalStateException ("Frozen. Cannot split.");
InstanceList ilist0 = new InstanceList (ilist.getPipe());
InstanceList ilist1 = new InstanceList (ilist.getPipe());
for (int i = 0; i < ilist.size(); i++) {
Instance instance = ilist.get(i);
FeatureVector fv = (FeatureVector) instance.getData ();
// xxx What test should this be? What to do with negative values?
// Whatever is decided here should also go in InfoGain.calcInfoGains()
if (fv.value (featureIndex) != 0) {
//System.out.println ("list1 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
ilist1.add (instance, ilist.getInstanceWeight(i));
} else {
//System.out.println ("list0 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
ilist0.add (instance, ilist.getInstanceWeight(i));
}
}
logger.info("child0="+ilist0.size()+" child1="+ilist1.size());
child0 = new Node (ilist0, this, fs);
child1 = new Node (ilist1, this, fs);
}
示例2: collectConstraints
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void collectConstraints (InstanceList ilist)
{
for (int inum = 0; inum < ilist.size(); inum++) {
logger.finest ("*** Collecting constraints for instance "+inum);
Instance inst = ilist.get (inum);
ACRF.UnrolledGraph unrolled = new ACRF.UnrolledGraph (inst, templates, null, true);
Assignment assn = unrolled.getAssignment ();
collectConstraintsForGraph (unrolled, assn);
}
}
示例3: getOptimizableCRF
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public CRFOptimizableByBatchLabelLikelihood getOptimizableCRF (InstanceList trainingSet) {
if (cachedWeightsStructureStamp != crf.weightsStructureChangeStamp) {
if (!useNoWeights) {
if (useSparseWeights) {
crf.setWeightsDimensionAsIn (trainingSet, useSomeUnsupportedTrick);
}
else {
crf.setWeightsDimensionDensely ();
}
}
optimizable = null;
cachedWeightsStructureStamp = crf.weightsStructureChangeStamp;
}
if (optimizable == null || optimizable.trainingSet != trainingSet) {
optimizable = new CRFOptimizableByBatchLabelLikelihood(crf, trainingSet, numThreads);
optimizable.setGaussianPriorVariance(gaussianPriorVariance);
threadedOptimizable = new ThreadedOptimizable(optimizable, trainingSet, crf.getParameters().getNumFactors(),
new CRFCacheStaleIndicator(crf));
optimizer = null;
}
return optimizable;
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:CRFTrainerByThreadedLabelLikelihood.java
示例4: addOrderNStates
import cc.mallet.types.InstanceList; //导入依赖的package包/类
/**
*
* @param n:
* positive integer. Creates all possible orders from 0 until and
* including n
*/
public void addOrderNStates(int n, InstanceList trainingInstances) {
Pattern forbiddenPat = Pattern.compile("\\s");
Pattern allowedPat = Pattern.compile(".*");
List<Integer> orders = new ArrayList<Integer>();
for (int i = 0; i <= n; i++) {
orders.add(i);
}
int[] ordersArray = null;
if (orders.size() > 0) {
ordersArray = ArrayUtils.toPrimitive(orders.toArray(new Integer[orders.size()]));
}
String startName = this.crf.addOrderNStates(trainingInstances, ordersArray, null, "O", forbiddenPat, allowedPat,
true);
for (int i = 0; i < this.crf.numStates(); i++) {
this.crf.getState(i).setInitialWeight(Transducer.IMPOSSIBLE_WEIGHT);
}
this.crf.getState(startName).setInitialWeight(0.0);
this.crf.setWeightsDimensionDensely();
}
示例5: gatherConstraints
import cc.mallet.types.InstanceList; //导入依赖的package包/类
/**
* Set the constraints by running forward-backward with the <i>output label
* sequence provided</i>, thus restricting it to only those paths that agree with
* the label sequence.
*/
protected void gatherConstraints(InstanceList ilist) {
logger.info("Gathering constraints...");
assert (constraints.structureMatches(crf.parameters));
constraints.zero();
for (Instance instance : ilist) {
FeatureVectorSequence input = (FeatureVectorSequence) instance.getData();
FeatureSequence output = (FeatureSequence) instance.getTarget();
double instanceWeight = ilist.getInstanceWeight(instance);
Transducer.Incrementor incrementor =
instanceWeight == 1.0 ? constraints.new Incrementor()
: constraints.new WeightedIncrementor(instanceWeight);
new SumLatticeDefault (this.crf, input, output, incrementor);
}
constraints.assertNotNaNOrInfinite();
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:CRFOptimizableByBatchLabelLikelihood.java
示例6: serializePage
import cc.mallet.types.InstanceList; //导入依赖的package包/类
/** Serialize a page without metadata. This attempts to serialize the
* minimum amount needed to restore the page, leaving out redundant data
* such as pipes and dictionaries.
* @param out Object output stream
* @param page
* @throws IOException
*/
private void serializePage (ObjectOutputStream out, InstanceList page)
throws IOException {
out.writeInt (page.size ());
for (Instance inst : page) {
serializeObject (out, inst.getData ());
serializeObject (out, inst.getTarget ());
out.writeObject (inst.getName ());
out.writeObject (inst.getSource ());
if (this.instWeights != null) {
Double weight = this.instWeights.get (inst);
if (weight != null) {
out.writeDouble (this.instWeights.get (inst));
} else {
out.writeDouble (1.0);
}
} else {
out.writeDouble (1.0);
}
}
}
示例7: trainWithFeatureInduction
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public boolean trainWithFeatureInduction (InstanceList trainingData,
InstanceList validationData, InstanceList testingData,
TransducerEvaluator eval, int numIterations,
int numIterationsBetweenFeatureInductions,
int numFeatureInductions,
int numFeaturesPerFeatureInduction,
double trueLabelProbThreshold,
boolean clusteredFeatureInduction,
double[] trainingProportions)
{
return trainWithFeatureInduction (trainingData, validationData, testingData,
eval, numIterations, numIterationsBetweenFeatureInductions,
numFeatureInductions, numFeaturesPerFeatureInduction,
trueLabelProbThreshold, clusteredFeatureInduction,
trainingProportions, "exp");
}
示例8: Clustering
import cc.mallet.types.InstanceList; //导入依赖的package包/类
/** Clustering constructor.
*
* @param instances Instances that are clustered
* @param numLabels Number of clusters
* @param labels Assignment of instances to clusters; many-to-one with
* range [0,numLabels).
*/
public Clustering (InstanceList instances, int numLabels, int[] labels) {
if (instances.size() != labels.length)
throw new IllegalArgumentException("Instance list length does not match cluster labeling");
if (numLabels < 1)
throw new IllegalArgumentException("Number of labels must be strictly positive.");
for (int i = 0 ; i < labels.length ; i++)
if (labels[i] < 0 || labels[i] >= numLabels)
throw new IllegalArgumentException("Label mapping must have range [0,numLabels).");
this.instances = instances;
this.numLabels = numLabels;
this.labels = labels;
}
示例9: labelConnectionsIn
import cc.mallet.types.InstanceList; //导入依赖的package包/类
private boolean[][] labelConnectionsIn (InstanceList trainingSet, String start)
{
int numLabels = outputAlphabet.size();
boolean[][] connections = new boolean[numLabels][numLabels];
for (int i = 0; i < trainingSet.size(); i++) {
Instance instance = trainingSet.get(i);
FeatureSequence output = (FeatureSequence) instance.getTarget();
for (int j = 1; j < output.size(); j++) {
int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1));
int destIndex = outputAlphabet.lookupIndex (output.get(j));
assert (sourceIndex >= 0 && destIndex >= 0);
connections[sourceIndex][destIndex] = true;
}
}
// Handle start state
if (start != null) {
int startIndex = outputAlphabet.lookupIndex (start);
for (int j = 0; j < outputAlphabet.size(); j++) {
connections[startIndex][j] = true;
}
}
return connections;
}
示例10: testTokenAccuracy
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void testTokenAccuracy() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
InstanceList[] lists = instances.split(new Random(777), new double[] {
.5, .5 });
CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
crf.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
crft.setUseSparseWeights(true);
crft.trainIncremental(lists[0]);
TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
new String[] { "Train", "Test" });
eval.evaluateInstanceList(crft, lists[1], "Test");
assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);
}
示例11: getOptimizableCRF
import cc.mallet.types.InstanceList; //导入依赖的package包/类
/**
* Returns an optimizable CRF that contains a collection of objective functions.
* <p>
* If one doesn't exist then creates one and sets the optimizer to null.
*/
public OptimizableCRF getOptimizableCRF (InstanceList trainingSet) {
// gsc: user should call setWeightsDimensionsAsIn before the optimizable and
// trainer objects are created
// if (cachedWeightsStructureStamp != crf.weightsStructureChangeStamp) {
// if (useSparseWeights)
// crf.setWeightsDimensionAsIn (trainingSet, useUnsupportedTrick);
// else
// crf.setWeightsDimensionDensely ();
// ocrf = null;
// cachedWeightsStructureStamp = crf.weightsStructureChangeStamp;
// }
if (ocrf == null || ocrf.trainingSet != trainingSet) {
ocrf = new OptimizableCRF (crf, trainingSet);
opt = null;
}
return ocrf;
}
示例12: computeLikelihood
import cc.mallet.types.InstanceList; //导入依赖的package包/类
private double computeLikelihood(InstanceList trainingSample) {
double loglik = 0.0;
for (int i = 0; i < trainingSample.size(); i++) {
Instance trainingInstance = trainingSample.get(i);
FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance
.getData();
Sequence labelSequence = (Sequence) trainingInstance.getTarget();
loglik += new SumLatticeDefault(crf, fvs, labelSequence, null)
.getTotalWeight();
loglik -= new SumLatticeDefault(crf, fvs, null, null)
.getTotalWeight();
}
constraints.zero();
expectations.zero();
return loglik;
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:CRFTrainerByStochasticGradient.java
示例13: CRFOptimizableByBatchLabelLikelihood
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public CRFOptimizableByBatchLabelLikelihood(CRF crf, InstanceList ilist, int numBatches) {
// set up
this.crf = crf;
this.trainingSet = ilist;
this.numBatches = numBatches;
cachedValue = new double[this.numBatches];
cachedGradient = new ArrayList<double[]>(this.numBatches);
expectations = new ArrayList<CRF.Factors>(this.numBatches);
int numFactors = crf.parameters.getNumFactors();
for (int i = 0; i < this.numBatches; ++i) {
cachedGradient.add(new double[numFactors]);
expectations.add(new CRF.Factors(crf.parameters));
}
constraints = new CRF.Factors(crf.parameters);
gatherConstraints(ilist);
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:19,代码来源:CRFOptimizableByBatchLabelLikelihood.java
示例14: CRFOptimizableByGE
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public CRFOptimizableByGE(CRF crf, ArrayList<GEConstraint> constraints, InstanceList data, StateLabelMap map, int numThreads, double weight) {
this.crf = crf;
this.constraints = constraints;
this.cache = Integer.MAX_VALUE;
this.cachedValue = Double.NaN;
this.cachedGradient = new CRF.Factors(crf);
this.data = data;
this.numThreads = numThreads;
this.weight = weight;
instancesWithConstraints = new BitSet(data.size());
for (GEConstraint constraint : constraints) {
constraint.setStateLabelMap(map);
BitSet bitset = constraint.preProcess(data);
instancesWithConstraints.or(bitset);
}
this.gpv = DEFAULT_GPV;
if (numThreads > 1) {
this.executor = (ThreadPoolExecutor)Executors.newFixedThreadPool(numThreads);
}
createReverseTransitionMatrices(crf);
}
示例15: testSpaceViewer
import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void testSpaceViewer () throws IOException
{
Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
String[] data0 = { TestCRF.data[0] };
String[] data1 = { TestCRF.data[1] };
InstanceList training = new InstanceList (pipe);
training.addThruPipe (new ArrayIterator (data0));
InstanceList testing = new InstanceList (pipe);
testing.addThruPipe (new ArrayIterator (data1));
CRF crf = new CRF (pipe, null);
crf.addFullyConnectedStatesForLabels ();
CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
crft.trainIncremental (training);
CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf);
Extraction extraction = extor.extract (new ArrayIterator (data1));
if (!outputDir.exists ()) outputDir.mkdir ();
DocumentViewer.writeExtraction (outputDir, extraction);
}