当前位置: 首页>>代码示例>>Java>>正文


Java InstanceList.get方法代码示例

本文整理汇总了Java中cc.mallet.types.InstanceList.get方法的典型用法代码示例。如果您正苦于以下问题:Java InstanceList.get方法的具体用法?Java InstanceList.get怎么用?Java InstanceList.get使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.InstanceList的用法示例。


在下文中一共展示了InstanceList.get方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: predict

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/** This method is deprecated. */
// But it is here as a reminder to do something about induceFeaturesFor(). */
@Deprecated
public Sequence[] predict (InstanceList testing) {
	testing.setFeatureSelection(this.globalFeatureSelection);
	for (int i = 0; i < featureInducers.size(); i++) {
		FeatureInducer klfi = (FeatureInducer)featureInducers.get(i);
		klfi.induceFeaturesFor (testing, false, false);
	}
	Sequence[] ret = new Sequence[testing.size()];
	for (int i = 0; i < testing.size(); i++) {
		Instance instance = testing.get(i);
		Sequence input = (Sequence) instance.getData();
		Sequence trueOutput = (Sequence) instance.getTarget();
		assert (input.size() == trueOutput.size());
		Sequence predOutput = new MaxLatticeDefault(this, input).bestOutputSequence();
		assert (predOutput.size() == trueOutput.size());
		ret[i] = predOutput;
	}
	return ret;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:CRF.java

示例2: computeLikelihood

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private double computeLikelihood(InstanceList trainingSample) {
	double loglik = 0.0;
	for (int i = 0; i < trainingSample.size(); i++) {
		Instance trainingInstance = trainingSample.get(i);
		FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance
				.getData();
		Sequence labelSequence = (Sequence) trainingInstance.getTarget();
		loglik += new SumLatticeDefault(crf, fvs, labelSequence, null)
				.getTotalWeight();
		loglik -= new SumLatticeDefault(crf, fvs, null, null)
				.getTotalWeight();
	}
	constraints.zero();
	expectations.zero();
	return loglik;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:CRFTrainerByStochasticGradient.java

示例3: evaluateInstanceList

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void evaluateInstanceList (TransducerTrainer tt, InstanceList data, String description)
{
  int correct = 0;
  for (int i = 0; i < data.size(); i++) {
    Instance instance = data.get(i);
    Sequence input = (Sequence) instance.getData();
    Sequence trueOutput = (Sequence) instance.getTarget();
    assert (input.size() == trueOutput.size());
    Sequence predOutput = tt.getTransducer().transduce (input);
    assert (predOutput.size() == trueOutput.size());
    if (sequencesMatch (trueOutput, predOutput))
      correct++;
    }
  double acc = ((double)correct) / data.size();
accuracy.put(description, acc);

  logger.info (description+" Num instances = "+data.size()+"  Num correct = "+correct+" Per-instance accuracy = "+acc);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:19,代码来源:InstanceAccuracyEvaluator.java

示例4: printInstanceLists

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void printInstanceLists ()
{
	for (int i = 0; i < memm.numStates(); i++) {
		State state = (State) memm.getState (i);
		InstanceList training = state.trainingSet;
		System.out.println ("State "+i+" : "+state.getName());
		if (training == null) {
			System.out.println ("No data");
			continue;
		}
		for (int j = 0; j < training.size(); j++) {
			Instance inst = training.get (j);
			System.out.println ("From : "+state.getName()+" To : "+inst.getTarget());
			System.out.println ("Instance "+j);
			System.out.println (inst.getTarget());
			System.out.println (inst.getData());
		}
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:MEMMTrainer.java

示例5: collectConstraintsForInstance

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private void collectConstraintsForInstance (InstanceList ilist, int inum)
{
  Instance inst = ilist.get (inum);
  ACRF.UnrolledGraph unrolled = new ACRF.UnrolledGraph (inst, templates, null, false);
  for (Iterator it = unrolled.unrolledVarSetIterator (); it.hasNext();) {
    ACRF.UnrolledVarSet clique = (ACRF.UnrolledVarSet) it.next();
    int tidx = clique.getTemplate().index;
    if (tidx == -1) continue;

    int assn = clique.lookupAssignmentNumber ();
    constraints [tidx][assn].plusEqualsSparse (clique.getFv ());
    if (defaultConstraints[tidx].location (assn) != -1)
      defaultConstraints [tidx].incrementValue (assn, 1.0);
  }
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:16,代码来源:PiecewiseACRFTrainer.java

示例6: findOutMode

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
protected AbstractMap.SimpleEntry<String,Integer> findOutMode(CorpusRepresentationMalletTarget crm)  {
  InstanceList instances = crm.getRepresentationMallet();
  // we pass on a "mode" for the learning problem, which is one of the following:
  // - classind: predict the index of a class
  // - classcosts: targets are vectors of class costs
  // - regr: regression
  // we also pass on another parameter which provides details of the learning problem:
  // - the number of class indices in case of classind and classcosts
  // - 0 as a dummy value in case of "regr"
  
  int nrClasses = 0;
  String mode = "regr";
  Alphabet ta = crm.getPipe().getTargetAlphabet();
  
  if(ta != null) {
    // if this is invoked for training, we should have a first instance, but for 
    // application, we do not have any instances yet. If we do not have any instances, we 
    // just use dummy values for now since at the moment we do not need this information
    // at application time. Should we ever need it we need to store this in the pipe!
    if(instances==null || instances.isEmpty()) {
      mode="classind";
      nrClasses=-1;
    } else {
      Instance firstInstance = instances.get(0);
      Object targetObj = firstInstance.getTarget();
      if(targetObj instanceof NominalTargetWithCosts) {
        NominalTargetWithCosts target = (NominalTargetWithCosts)targetObj;
        nrClasses = target.getCosts().length;
        mode = "classcosts";
      } else {
        mode = "classind";
        nrClasses = ta.size();
      }
    }
  } 
  AbstractMap.SimpleEntry<String,Integer> ret = new AbstractMap.SimpleEntry<String, Integer>(mode,nrClasses);
  return ret;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:39,代码来源:EngineMBPythonNetworksBase.java

示例7: getFromMallet

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
 * Create libsvm representation from Mallet.
 *
 * @param instances
 * @return
 */
public static svm_problem getFromMallet(CorpusRepresentationMallet crm) {
  InstanceList instances = crm.getRepresentationMallet();
  svm_problem prob = new svm_problem();
  int numTrainingInstances = instances.size();
  prob.l = numTrainingInstances;
  prob.y = new double[prob.l];
  prob.x = new svm_node[prob.l][];

  for (int i = 0; i < numTrainingInstances; i++) {
    Instance instance = instances.get(i);

    //Labels
    // convert the target: if we get a label, convert to index,
    // if we get a double, use it directly
    Object tobj = instance.getTarget();
    if (tobj instanceof Label) {
      prob.y[i] = ((Label) instance.getTarget()).getIndex();
    } else if (tobj instanceof Double) {
      prob.y[i] = (double) tobj;
    } else {
      throw new GateRuntimeException("Odd target in mallet instance, cannot convert to LIBSVM: " + tobj);
    }

    //Features
    SparseVector data = (SparseVector) instance.getData();
    int[] indices = data.getIndices();
    double[] values = data.getValues();
    prob.x[i] = new svm_node[indices.length];
    for (int j = 0; j < indices.length; j++) {
      svm_node node = new svm_node();
      node.index = indices[j]+1; // NOTE: LibSVM location indices have to start with 1
      node.value = values[j];
      prob.x[i][j] = node;
    }
  }
  return prob;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:44,代码来源:CorpusRepresentationLibSVM.java

示例8: collectConstraints

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void collectConstraints (InstanceList ilist)
{
  for (int inum = 0; inum < ilist.size(); inum++) {
    logger.finest ("*** Collecting constraints for instance "+inum);
    Instance inst = ilist.get (inum);
    ACRF.UnrolledGraph unrolled = new ACRF.UnrolledGraph (inst, templates, null, true);
    Assignment assn = unrolled.getAssignment ();
    collectConstraintsForGraph (unrolled, assn);
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:11,代码来源:PseudolikelihoodACRFTrainer.java

示例9: mergeInstancesWithSameLabel

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static Clustering mergeInstancesWithSameLabel (Clustering clustering) {
	InstanceList list = clustering.getInstances();
	for (int i = 0; i < list.size(); i++) {
		Instance ii = list.get(i);
		int li = clustering.getLabel(i);
		for (int j = i + 1; j < list.size(); j++) {
			Instance ij = list.get(j);
			int lj = clustering.getLabel(j);
			if (li != lj && ii.getLabeling().equals(ij.getLabeling()))
				clustering = ClusterUtils.mergeClusters(clustering, li, lj);
		}
	}	
	return clustering;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:15,代码来源:ClusterUtils.java

示例10: evaluateInstanceList

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
@Override
 public void evaluateInstanceList(TransducerTrainer transducer,
     InstanceList instances, String description) {
   double[] predCounts = new double[instances.getTargetAlphabet().size()];
   double[] trueCounts = new double[instances.getTargetAlphabet().size()];

   int total = 0;
   for (int i = 0; i < instances.size(); i++) {
     Instance instance = instances.get(i);
     Sequence trueOutput = (Sequence) instance.getTarget();
     Sequence predOutput = (Sequence) transducer.getTransducer().transduce((Sequence)instance.getData());
     for (int j = 0; j < predOutput.size(); j++) {
       total++;
       predCounts[instances.getTargetAlphabet().lookupIndex(predOutput.get(j))]++;
       trueCounts[instances.getTargetAlphabet().lookupIndex(trueOutput.get(j))]++;
     }
   }

NumberFormat formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(4);

   for (int li = 0; li < predCounts.length; li++) {
     double ppred = predCounts[li] / total;
     double ptrue = trueCounts[li] / total;
     logger.info(description + " " + instances.getTargetAlphabet().lookupObject(li) + " predicted: " + formatter.format(ppred) + " - true: " + formatter.format(ptrue));
   }
 }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:LabelDistributionEvaluator.java

示例11: getFeatureLabelCounts

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static double[][] getFeatureLabelCounts(InstanceList list, boolean useValues) {
  int numFeatures = list.getDataAlphabet().size();
  int numLabels = list.getTargetAlphabet().size();
  
  double[][] featureLabelCounts = new double[numFeatures][numLabels];
  
  for (int ii = 0; ii < list.size(); ii++) {
    Instance instance = list.get(ii);
    FeatureVector featureVector = (FeatureVector)instance.getData();
    
    // this handles distributions over labels
    for (int li = 0; li < numLabels; li++) {
      double py = instance.getLabeling().value(li);
      for (int loc = 0; loc < featureVector.numLocations(); loc++) {
        int fi = featureVector.indexAtLocation(loc);
        double val;
        if (useValues) {
          val = featureVector.valueAtLocation(loc);
        }
        else {
          val = 1.0;
        }
        featureLabelCounts[fi][li] += py * val;
      }
    }
  }
  return featureLabelCounts;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:29,代码来源:FeatureConstraintUtil.java

示例12: testXis

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void testXis() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));

	CRF crf1 = new CRF(p, null);
	crf1.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
			crf1);
	crft1.train(instances, 10); // Let's get some parameters

	Instance inst = instances.get(0);
	Sequence input = (Sequence) inst.getData();
	SumLatticeDefault lattice = new SumLatticeDefault(crf1, input,
			(Sequence) inst.getTarget(), null, true);
	for (int ip = 0; ip < lattice.length() - 1; ip++) {
		for (int i = 0; i < crf1.numStates(); i++) {
			Transducer.State state = crf1.getState(i);
			Transducer.TransitionIterator it = state.transitionIterator(
					input, ip);
			double gamma = lattice.getGammaProbability(ip, state);
			double xiSum = 0;
			while (it.hasNext()) {
				Transducer.State dest = it.nextState();
				double xi = lattice.getXiProbability(ip, state, dest);
				xiSum += xi;
			}
			assertEquals(gamma, xiSum, 1e-5);
		}
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:33,代码来源:TestCRF.java

示例13: createExtractionFrom

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private Extraction createExtractionFrom (String[] predStrings, String[] trueStrings)
{
  Pipe pipe = new SerialPipes (new Pipe[] {
    new SGML2TokenSequence (new CharSequenceLexer (CharSequenceLexer.LEX_NONWHITESPACE_CLASSES	), "O"),
    new Target2LabelSequence (),
    new PrintInputAndTarget (),
  });

  InstanceList pred = new InstanceList (pipe);
  pred.addThruPipe (new ArrayIterator (predStrings));

  InstanceList targets = new InstanceList (pipe);
  targets.addThruPipe (new ArrayIterator (trueStrings));

  LabelAlphabet dict = (LabelAlphabet) pipe.getTargetAlphabet ();
  Extraction extraction = new Extraction (null, dict);

  for (int i = 0; i < pred.size(); i++) {
    Instance aPred = pred.get (i);
    Instance aTarget = targets.get (i);
    Tokenization input = (Tokenization) aPred.getData ();
    Sequence predSeq = (Sequence) aPred.getTarget ();
    Sequence targetSeq = (Sequence) aTarget.getTarget ();
    DocumentExtraction docextr = new DocumentExtraction ("TEST"+i, dict, input, predSeq, targetSeq, "O");
    extraction.addDocumentExtraction (docextr);
  }

  return extraction;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:30,代码来源:TestPerDocumentF1Evaluator.java

示例14: train

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public NaiveBayes train (InstanceList trainingSet)
{

  // Get a classifier trained on the labeled examples only
  NaiveBayes c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet);
  double prevLogLikelihood = 0, logLikelihood = 0;
  boolean converged = false;

  int iteration = 0;
  while (!converged) {
    // Make a new trainingSet that has some labels set
    InstanceList trainingSet2 = new InstanceList (trainingSet.getPipe());
    for (int ii = 0; ii < trainingSet.size(); ii++) {
      Instance inst = trainingSet.get(ii);
      if (inst.getLabeling() != null)
        trainingSet2.add(inst, 1.0);
      else {
        Instance inst2 = inst.shallowCopy();
        inst2.unLock();
        inst2.setLabeling(c.classify(inst).getLabeling());
        inst2.lock();
        trainingSet2.add(inst2, unlabeledDataWeight);
      }
    }
    c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet2);
    logLikelihood = c.dataLogLikelihood (trainingSet2);
    System.err.println ("Loglikelihood = "+logLikelihood);
    // Wait for a change in log-likelihood of less than 0.01% and at least 10 iterations
    if (Math.abs((logLikelihood - prevLogLikelihood)/logLikelihood) < 0.0001)
      converged = true;
    prevLogLikelihood = logLikelihood;
    iteration++;
  }
  return c;    
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:36,代码来源:NaiveBayesEMTrainer.java

示例15: main

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static void main(String[] args) {
	String htmldir = args[0];
	Pipe pipe = new SerialPipes(new Pipe[] { new Input2CharSequence(),
			new CharSequenceRemoveHTML() });
	InstanceList list = new InstanceList(pipe);
	list.addThruPipe(new FileIterator(htmldir, FileIterator.STARTING_DIRECTORIES));

	for (int index = 0; index < list.size(); index++) {
		Instance inst = list.get(index);
		System.err.println(inst.getData());
	}

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:14,代码来源:CharSequenceRemoveHTML.java


注:本文中的cc.mallet.types.InstanceList.get方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。