本文整理汇总了Java中cc.mallet.types.InstanceList.get方法的典型用法代码示例。如果您正苦于以下问题:Java InstanceList.get方法的具体用法?Java InstanceList.get怎么用?Java InstanceList.get使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.InstanceList
的用法示例。
在下文中一共展示了InstanceList.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: predict
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/** This method is deprecated. */
// But it is here as a reminder to do something about induceFeaturesFor(). */
@Deprecated
public Sequence[] predict (InstanceList testing) {
testing.setFeatureSelection(this.globalFeatureSelection);
for (int i = 0; i < featureInducers.size(); i++) {
FeatureInducer klfi = (FeatureInducer)featureInducers.get(i);
klfi.induceFeaturesFor (testing, false, false);
}
Sequence[] ret = new Sequence[testing.size()];
for (int i = 0; i < testing.size(); i++) {
Instance instance = testing.get(i);
Sequence input = (Sequence) instance.getData();
Sequence trueOutput = (Sequence) instance.getTarget();
assert (input.size() == trueOutput.size());
Sequence predOutput = new MaxLatticeDefault(this, input).bestOutputSequence();
assert (predOutput.size() == trueOutput.size());
ret[i] = predOutput;
}
return ret;
}
示例2: computeLikelihood
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private double computeLikelihood(InstanceList trainingSample) {
double loglik = 0.0;
for (int i = 0; i < trainingSample.size(); i++) {
Instance trainingInstance = trainingSample.get(i);
FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance
.getData();
Sequence labelSequence = (Sequence) trainingInstance.getTarget();
loglik += new SumLatticeDefault(crf, fvs, labelSequence, null)
.getTotalWeight();
loglik -= new SumLatticeDefault(crf, fvs, null, null)
.getTotalWeight();
}
constraints.zero();
expectations.zero();
return loglik;
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:CRFTrainerByStochasticGradient.java
示例3: evaluateInstanceList
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void evaluateInstanceList (TransducerTrainer tt, InstanceList data, String description)
{
int correct = 0;
for (int i = 0; i < data.size(); i++) {
Instance instance = data.get(i);
Sequence input = (Sequence) instance.getData();
Sequence trueOutput = (Sequence) instance.getTarget();
assert (input.size() == trueOutput.size());
Sequence predOutput = tt.getTransducer().transduce (input);
assert (predOutput.size() == trueOutput.size());
if (sequencesMatch (trueOutput, predOutput))
correct++;
}
double acc = ((double)correct) / data.size();
accuracy.put(description, acc);
logger.info (description+" Num instances = "+data.size()+" Num correct = "+correct+" Per-instance accuracy = "+acc);
}
示例4: printInstanceLists
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void printInstanceLists ()
{
for (int i = 0; i < memm.numStates(); i++) {
State state = (State) memm.getState (i);
InstanceList training = state.trainingSet;
System.out.println ("State "+i+" : "+state.getName());
if (training == null) {
System.out.println ("No data");
continue;
}
for (int j = 0; j < training.size(); j++) {
Instance inst = training.get (j);
System.out.println ("From : "+state.getName()+" To : "+inst.getTarget());
System.out.println ("Instance "+j);
System.out.println (inst.getTarget());
System.out.println (inst.getData());
}
}
}
示例5: collectConstraintsForInstance
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private void collectConstraintsForInstance (InstanceList ilist, int inum)
{
Instance inst = ilist.get (inum);
ACRF.UnrolledGraph unrolled = new ACRF.UnrolledGraph (inst, templates, null, false);
for (Iterator it = unrolled.unrolledVarSetIterator (); it.hasNext();) {
ACRF.UnrolledVarSet clique = (ACRF.UnrolledVarSet) it.next();
int tidx = clique.getTemplate().index;
if (tidx == -1) continue;
int assn = clique.lookupAssignmentNumber ();
constraints [tidx][assn].plusEqualsSparse (clique.getFv ());
if (defaultConstraints[tidx].location (assn) != -1)
defaultConstraints [tidx].incrementValue (assn, 1.0);
}
}
示例6: findOutMode
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
protected AbstractMap.SimpleEntry<String,Integer> findOutMode(CorpusRepresentationMalletTarget crm) {
InstanceList instances = crm.getRepresentationMallet();
// we pass on a "mode" for the learning problem, which is one of the following:
// - classind: predict the index of a class
// - classcosts: targets are vectors of class costs
// - regr: regression
// we also pass on another parameter which provides details of the learning problem:
// - the number of class indices in case of classind and classcosts
// - 0 as a dummy value in case of "regr"
int nrClasses = 0;
String mode = "regr";
Alphabet ta = crm.getPipe().getTargetAlphabet();
if(ta != null) {
// if this is invoked for training, we should have a first instance, but for
// application, we do not have any instances yet. If we do not have any instances, we
// just use dummy values for now since at the moment we do not need this information
// at application time. Should we ever need it we need to store this in the pipe!
if(instances==null || instances.isEmpty()) {
mode="classind";
nrClasses=-1;
} else {
Instance firstInstance = instances.get(0);
Object targetObj = firstInstance.getTarget();
if(targetObj instanceof NominalTargetWithCosts) {
NominalTargetWithCosts target = (NominalTargetWithCosts)targetObj;
nrClasses = target.getCosts().length;
mode = "classcosts";
} else {
mode = "classind";
nrClasses = ta.size();
}
}
}
AbstractMap.SimpleEntry<String,Integer> ret = new AbstractMap.SimpleEntry<String, Integer>(mode,nrClasses);
return ret;
}
示例7: getFromMallet
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
* Create libsvm representation from Mallet.
*
* @param instances
* @return
*/
public static svm_problem getFromMallet(CorpusRepresentationMallet crm) {
InstanceList instances = crm.getRepresentationMallet();
svm_problem prob = new svm_problem();
int numTrainingInstances = instances.size();
prob.l = numTrainingInstances;
prob.y = new double[prob.l];
prob.x = new svm_node[prob.l][];
for (int i = 0; i < numTrainingInstances; i++) {
Instance instance = instances.get(i);
//Labels
// convert the target: if we get a label, convert to index,
// if we get a double, use it directly
Object tobj = instance.getTarget();
if (tobj instanceof Label) {
prob.y[i] = ((Label) instance.getTarget()).getIndex();
} else if (tobj instanceof Double) {
prob.y[i] = (double) tobj;
} else {
throw new GateRuntimeException("Odd target in mallet instance, cannot convert to LIBSVM: " + tobj);
}
//Features
SparseVector data = (SparseVector) instance.getData();
int[] indices = data.getIndices();
double[] values = data.getValues();
prob.x[i] = new svm_node[indices.length];
for (int j = 0; j < indices.length; j++) {
svm_node node = new svm_node();
node.index = indices[j]+1; // NOTE: LibSVM location indices have to start with 1
node.value = values[j];
prob.x[i][j] = node;
}
}
return prob;
}
示例8: collectConstraints
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void collectConstraints (InstanceList ilist)
{
for (int inum = 0; inum < ilist.size(); inum++) {
logger.finest ("*** Collecting constraints for instance "+inum);
Instance inst = ilist.get (inum);
ACRF.UnrolledGraph unrolled = new ACRF.UnrolledGraph (inst, templates, null, true);
Assignment assn = unrolled.getAssignment ();
collectConstraintsForGraph (unrolled, assn);
}
}
示例9: mergeInstancesWithSameLabel
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static Clustering mergeInstancesWithSameLabel (Clustering clustering) {
InstanceList list = clustering.getInstances();
for (int i = 0; i < list.size(); i++) {
Instance ii = list.get(i);
int li = clustering.getLabel(i);
for (int j = i + 1; j < list.size(); j++) {
Instance ij = list.get(j);
int lj = clustering.getLabel(j);
if (li != lj && ii.getLabeling().equals(ij.getLabeling()))
clustering = ClusterUtils.mergeClusters(clustering, li, lj);
}
}
return clustering;
}
示例10: evaluateInstanceList
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
@Override
public void evaluateInstanceList(TransducerTrainer transducer,
InstanceList instances, String description) {
double[] predCounts = new double[instances.getTargetAlphabet().size()];
double[] trueCounts = new double[instances.getTargetAlphabet().size()];
int total = 0;
for (int i = 0; i < instances.size(); i++) {
Instance instance = instances.get(i);
Sequence trueOutput = (Sequence) instance.getTarget();
Sequence predOutput = (Sequence) transducer.getTransducer().transduce((Sequence)instance.getData());
for (int j = 0; j < predOutput.size(); j++) {
total++;
predCounts[instances.getTargetAlphabet().lookupIndex(predOutput.get(j))]++;
trueCounts[instances.getTargetAlphabet().lookupIndex(trueOutput.get(j))]++;
}
}
NumberFormat formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(4);
for (int li = 0; li < predCounts.length; li++) {
double ppred = predCounts[li] / total;
double ptrue = trueCounts[li] / total;
logger.info(description + " " + instances.getTargetAlphabet().lookupObject(li) + " predicted: " + formatter.format(ppred) + " - true: " + formatter.format(ptrue));
}
}
示例11: getFeatureLabelCounts
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static double[][] getFeatureLabelCounts(InstanceList list, boolean useValues) {
int numFeatures = list.getDataAlphabet().size();
int numLabels = list.getTargetAlphabet().size();
double[][] featureLabelCounts = new double[numFeatures][numLabels];
for (int ii = 0; ii < list.size(); ii++) {
Instance instance = list.get(ii);
FeatureVector featureVector = (FeatureVector)instance.getData();
// this handles distributions over labels
for (int li = 0; li < numLabels; li++) {
double py = instance.getLabeling().value(li);
for (int loc = 0; loc < featureVector.numLocations(); loc++) {
int fi = featureVector.indexAtLocation(loc);
double val;
if (useValues) {
val = featureVector.valueAtLocation(loc);
}
else {
val = 1.0;
}
featureLabelCounts[fi][li] += py * val;
}
}
}
return featureLabelCounts;
}
示例12: testXis
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testXis() {
Pipe p = makeSpacePredictionPipe();
InstanceList instances = new InstanceList(p);
instances.addThruPipe(new ArrayIterator(data));
CRF crf1 = new CRF(p, null);
crf1.addFullyConnectedStatesForLabels();
CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
crf1);
crft1.train(instances, 10); // Let's get some parameters
Instance inst = instances.get(0);
Sequence input = (Sequence) inst.getData();
SumLatticeDefault lattice = new SumLatticeDefault(crf1, input,
(Sequence) inst.getTarget(), null, true);
for (int ip = 0; ip < lattice.length() - 1; ip++) {
for (int i = 0; i < crf1.numStates(); i++) {
Transducer.State state = crf1.getState(i);
Transducer.TransitionIterator it = state.transitionIterator(
input, ip);
double gamma = lattice.getGammaProbability(ip, state);
double xiSum = 0;
while (it.hasNext()) {
Transducer.State dest = it.nextState();
double xi = lattice.getXiProbability(ip, state, dest);
xiSum += xi;
}
assertEquals(gamma, xiSum, 1e-5);
}
}
}
示例13: createExtractionFrom
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private Extraction createExtractionFrom (String[] predStrings, String[] trueStrings)
{
Pipe pipe = new SerialPipes (new Pipe[] {
new SGML2TokenSequence (new CharSequenceLexer (CharSequenceLexer.LEX_NONWHITESPACE_CLASSES ), "O"),
new Target2LabelSequence (),
new PrintInputAndTarget (),
});
InstanceList pred = new InstanceList (pipe);
pred.addThruPipe (new ArrayIterator (predStrings));
InstanceList targets = new InstanceList (pipe);
targets.addThruPipe (new ArrayIterator (trueStrings));
LabelAlphabet dict = (LabelAlphabet) pipe.getTargetAlphabet ();
Extraction extraction = new Extraction (null, dict);
for (int i = 0; i < pred.size(); i++) {
Instance aPred = pred.get (i);
Instance aTarget = targets.get (i);
Tokenization input = (Tokenization) aPred.getData ();
Sequence predSeq = (Sequence) aPred.getTarget ();
Sequence targetSeq = (Sequence) aTarget.getTarget ();
DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O");
extraction.addDocumentExtraction (docextr);
}
return extraction;
}
示例14: train
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public NaiveBayes train (InstanceList trainingSet)
{
// Get a classifier trained on the labeled examples only
NaiveBayes c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet);
double prevLogLikelihood = 0, logLikelihood = 0;
boolean converged = false;
int iteration = 0;
while (!converged) {
// Make a new trainingSet that has some labels set
InstanceList trainingSet2 = new InstanceList (trainingSet.getPipe());
for (int ii = 0; ii < trainingSet.size(); ii++) {
Instance inst = trainingSet.get(ii);
if (inst.getLabeling() != null)
trainingSet2.add(inst, 1.0);
else {
Instance inst2 = inst.shallowCopy();
inst2.unLock();
inst2.setLabeling(c.classify(inst).getLabeling());
inst2.lock();
trainingSet2.add(inst2, unlabeledDataWeight);
}
}
c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet2);
logLikelihood = c.dataLogLikelihood (trainingSet2);
System.err.println ("Loglikelihood = "+logLikelihood);
// Wait for a change in log-likelihood of less than 0.01% and at least 10 iterations
if (Math.abs((logLikelihood - prevLogLikelihood)/logLikelihood) < 0.0001)
converged = true;
prevLogLikelihood = logLikelihood;
iteration++;
}
return c;
}
示例15: main
import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static void main(String[] args) {
String htmldir = args[0];
Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(),
new CharSequenceRemoveHTML() });
InstanceList list = new InstanceList(pipe);
list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES));
for (int index = 0; index < list.size(); index++) {
Instance inst = list.get(index);
System.err.println(inst.getData());
}
}