当前位置: 首页>>代码示例>>Java>>正文


Java InstanceList类代码示例

本文整理汇总了Java中cc.mallet.types.InstanceList的典型用法代码示例。如果您正苦于以下问题:Java InstanceList类的具体用法?Java InstanceList怎么用?Java InstanceList使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


InstanceList类属于cc.mallet.types包,在下文中一共展示了InstanceList类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: split

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void split (FeatureSelection fs)
{
	if (ilist == null)
		throw new IllegalStateException ("Frozen.  Cannot split.");
	InstanceList ilist0 = new InstanceList (ilist.getPipe());
	InstanceList ilist1 = new InstanceList (ilist.getPipe());
	for (int i = 0; i < ilist.size(); i++) {
		Instance instance = ilist.get(i);
		FeatureVector fv = (FeatureVector) instance.getData ();
		// xxx What test should this be?  What to do with negative values?
			// Whatever is decided here should also go in InfoGain.calcInfoGains()
		if (fv.value (featureIndex) != 0) {
			//System.out.println ("list1 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
			ilist1.add (instance, ilist.getInstanceWeight(i));
		} else {
			//System.out.println ("list0 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
			ilist0.add (instance, ilist.getInstanceWeight(i));
		}
	}
	logger.info("child0="+ilist0.size()+" child1="+ilist1.size());
	child0 = new Node (ilist0, this, fs);
	child1 = new Node (ilist1, this, fs);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:DecisionTree.java

示例2: collectConstraints

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void collectConstraints (InstanceList ilist)
{
  for (int inum = 0; inum < ilist.size(); inum++) {
    logger.finest ("*** Collecting constraints for instance "+inum);
    Instance inst = ilist.get (inum);
    ACRF.UnrolledGraph unrolled = new ACRF.UnrolledGraph (inst, templates, null, true);
    Assignment assn = unrolled.getAssignment ();
    collectConstraintsForGraph (unrolled, assn);
  }
}
 
开发者ID:mimno,项目名称:GRMM,代码行数:11,代码来源:PseudolikelihoodACRFTrainer.java

示例3: getOptimizableCRF

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public CRFOptimizableByBatchLabelLikelihood getOptimizableCRF (InstanceList trainingSet) {
	if (cachedWeightsStructureStamp != crf.weightsStructureChangeStamp) {
		if (!useNoWeights) {
			if (useSparseWeights) {
				crf.setWeightsDimensionAsIn (trainingSet, useSomeUnsupportedTrick);	
			}
			else { 
				crf.setWeightsDimensionDensely ();
			}
		}
		optimizable = null;
		cachedWeightsStructureStamp = crf.weightsStructureChangeStamp;
	}
	if (optimizable == null || optimizable.trainingSet != trainingSet) {
		optimizable = new CRFOptimizableByBatchLabelLikelihood(crf, trainingSet, numThreads);
		optimizable.setGaussianPriorVariance(gaussianPriorVariance);
		threadedOptimizable = new ThreadedOptimizable(optimizable, trainingSet, crf.getParameters().getNumFactors(),
      new CRFCacheStaleIndicator(crf));
		optimizer = null;
	}
	return optimizable;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:CRFTrainerByThreadedLabelLikelihood.java

示例4: addOrderNStates

import cc.mallet.types.InstanceList; //导入依赖的package包/类
/**
 *
 * @param n:
 *            positive integer. Creates all possible orders from 0 until and
 *            including n
 */
public void addOrderNStates(int n, InstanceList trainingInstances) {

    Pattern forbiddenPat = Pattern.compile("\\s");
    Pattern allowedPat = Pattern.compile(".*");
    List<Integer> orders = new ArrayList<Integer>();
    for (int i = 0; i <= n; i++) {
        orders.add(i);
    }
    int[] ordersArray = null;
    if (orders.size() > 0) {
        ordersArray = ArrayUtils.toPrimitive(orders.toArray(new Integer[orders.size()]));
    }
    String startName = this.crf.addOrderNStates(trainingInstances, ordersArray, null, "O", forbiddenPat, allowedPat,
            true);
    for (int i = 0; i < this.crf.numStates(); i++) {
        this.crf.getState(i).setInitialWeight(Transducer.IMPOSSIBLE_WEIGHT);
    }
    this.crf.getState(startName).setInitialWeight(0.0);
    this.crf.setWeightsDimensionDensely();
}
 
开发者ID:exciteproject,项目名称:refext,代码行数:27,代码来源:ReferenceExtractorTrainer.java

示例5: gatherConstraints

import cc.mallet.types.InstanceList; //导入依赖的package包/类
/**
 * Set the constraints by running forward-backward with the <i>output label
 * sequence provided</i>, thus restricting it to only those paths that agree with
 * the label sequence.
 */
protected void gatherConstraints(InstanceList ilist) {
	logger.info("Gathering constraints...");
	assert (constraints.structureMatches(crf.parameters));
	constraints.zero();

	for (Instance instance : ilist) {
		FeatureVectorSequence input = (FeatureVectorSequence) instance.getData();
		FeatureSequence output = (FeatureSequence) instance.getTarget();
		double instanceWeight = ilist.getInstanceWeight(instance);
		Transducer.Incrementor incrementor =
			instanceWeight == 1.0 ? constraints.new Incrementor()
		: constraints.new WeightedIncrementor(instanceWeight);
			new SumLatticeDefault (this.crf, input, output, incrementor); 
	}
	constraints.assertNotNaNOrInfinite();
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:CRFOptimizableByBatchLabelLikelihood.java

示例6: serializePage

import cc.mallet.types.InstanceList; //导入依赖的package包/类
/** Serialize a page without metadata. This attempts to serialize the
 * minimum amount needed to restore the page, leaving out redundant data
 * such as pipes and dictionaries.
 * @param out Object output stream
 * @param page
 * @throws IOException 
 */
private void serializePage (ObjectOutputStream out, InstanceList page)
throws IOException {
    out.writeInt (page.size ());
    for (Instance inst : page) {
        serializeObject (out, inst.getData ());
        serializeObject (out, inst.getTarget ());
        out.writeObject (inst.getName ());
        out.writeObject (inst.getSource ());
        if (this.instWeights != null) {
            Double weight = this.instWeights.get (inst);
            if (weight != null) {
                out.writeDouble (this.instWeights.get (inst));
            } else {
                out.writeDouble (1.0);
            }
        } else {
            out.writeDouble (1.0);
        }
    }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:28,代码来源:PagedInstanceList.java

示例7: trainWithFeatureInduction

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public boolean trainWithFeatureInduction (InstanceList trainingData,
                                          InstanceList validationData, InstanceList testingData,
                                          TransducerEvaluator eval, int numIterations,
                                          int numIterationsBetweenFeatureInductions,
                                          int numFeatureInductions,
                                          int numFeaturesPerFeatureInduction,
                                          double trueLabelProbThreshold,
                                          boolean clusteredFeatureInduction,
                                          double[] trainingProportions)
{
	return trainWithFeatureInduction (trainingData, validationData, testingData,
			eval, numIterations, numIterationsBetweenFeatureInductions,
			numFeatureInductions, numFeaturesPerFeatureInduction,
			trueLabelProbThreshold, clusteredFeatureInduction,
			trainingProportions, "exp");
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:CRFTrainerByLabelLikelihood.java

示例8: Clustering

import cc.mallet.types.InstanceList; //导入依赖的package包/类
/** Clustering constructor.
 *
 * @param instances Instances that are clustered
 * @param numLabels Number of clusters
 * @param labels Assignment of instances to clusters; many-to-one with 
 *               range [0,numLabels).     
 */
public Clustering (InstanceList instances, int numLabels, int[] labels) {
	if (instances.size() != labels.length)
    throw new IllegalArgumentException("Instance list length does not match cluster labeling");
	
	if (numLabels < 1)
    throw new IllegalArgumentException("Number of labels must be strictly positive.");
	
	for (int i = 0 ; i < labels.length ; i++)
    if (labels[i] < 0 || labels[i] >= numLabels)
			throw new IllegalArgumentException("Label mapping must have range [0,numLabels).");
	
	this.instances = instances;
	this.numLabels = numLabels;
	this.labels = labels;	
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:Clustering.java

示例9: labelConnectionsIn

import cc.mallet.types.InstanceList; //导入依赖的package包/类
private boolean[][] labelConnectionsIn (InstanceList trainingSet, String start)
{
	int numLabels = outputAlphabet.size();
	boolean[][] connections = new boolean[numLabels][numLabels];
	for (int i = 0; i < trainingSet.size(); i++) {
		Instance instance = trainingSet.get(i);
		FeatureSequence output = (FeatureSequence) instance.getTarget();
		for (int j = 1; j < output.size(); j++) {
			int sourceIndex = outputAlphabet.lookupIndex (output.get(j-1));
			int destIndex = outputAlphabet.lookupIndex (output.get(j));
			assert (sourceIndex >= 0 && destIndex >= 0);
			connections[sourceIndex][destIndex] = true;
		}
	}

	// Handle start state
	if (start != null) {
		int startIndex = outputAlphabet.lookupIndex (start);
		for (int j = 0; j < outputAlphabet.size(); j++) {
			connections[startIndex][j] = true;
		}
	}

	return connections;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:26,代码来源:CRF.java

示例10: testTokenAccuracy

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void testTokenAccuracy() {
	Pipe p = makeSpacePredictionPipe();

	InstanceList instances = new InstanceList(p);
	instances.addThruPipe(new ArrayIterator(data));
	InstanceList[] lists = instances.split(new Random(777), new double[] {
			.5, .5 });

	CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
	crf.addFullyConnectedStatesForLabels();
	CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
	crft.setUseSparseWeights(true);

	crft.trainIncremental(lists[0]);

	TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists,
			new String[] { "Train", "Test" });
	eval.evaluateInstanceList(crft, lists[1], "Test");

	assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);

}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestCRF.java

示例11: getOptimizableCRF

import cc.mallet.types.InstanceList; //导入依赖的package包/类
/**
	 * Returns an optimizable CRF that contains a collection of objective functions.
	 * <p>
	 * If one doesn't exist then creates one and sets the optimizer to null.
	 */
	public OptimizableCRF getOptimizableCRF (InstanceList trainingSet) {
	  // gsc: user should call setWeightsDimensionsAsIn before the optimizable and
	  // trainer objects are created
//		if (cachedWeightsStructureStamp != crf.weightsStructureChangeStamp) {
//				if (useSparseWeights)
//					crf.setWeightsDimensionAsIn (trainingSet, useUnsupportedTrick);	
//				else 
//					crf.setWeightsDimensionDensely ();
//			ocrf = null;
//			cachedWeightsStructureStamp = crf.weightsStructureChangeStamp;
//		}
		if (ocrf == null || ocrf.trainingSet != trainingSet) {
			ocrf = new OptimizableCRF (crf, trainingSet);
			opt = null;
		}
		return ocrf;
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:CRFTrainerByValueGradients.java

示例12: computeLikelihood

import cc.mallet.types.InstanceList; //导入依赖的package包/类
private double computeLikelihood(InstanceList trainingSample) {
	double loglik = 0.0;
	for (int i = 0; i < trainingSample.size(); i++) {
		Instance trainingInstance = trainingSample.get(i);
		FeatureVectorSequence fvs = (FeatureVectorSequence) trainingInstance
				.getData();
		Sequence labelSequence = (Sequence) trainingInstance.getTarget();
		loglik += new SumLatticeDefault(crf, fvs, labelSequence, null)
				.getTotalWeight();
		loglik -= new SumLatticeDefault(crf, fvs, null, null)
				.getTotalWeight();
	}
	constraints.zero();
	expectations.zero();
	return loglik;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:CRFTrainerByStochasticGradient.java

示例13: CRFOptimizableByBatchLabelLikelihood

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public CRFOptimizableByBatchLabelLikelihood(CRF crf, InstanceList ilist, int numBatches) {
	// set up
	this.crf = crf;
	this.trainingSet = ilist;
	this.numBatches = numBatches;

	cachedValue = new double[this.numBatches];
	cachedGradient = new ArrayList<double[]>(this.numBatches);
	expectations = new ArrayList<CRF.Factors>(this.numBatches);
	int numFactors = crf.parameters.getNumFactors();
	for (int i = 0; i < this.numBatches; ++i) {
		cachedGradient.add(new double[numFactors]);
		expectations.add(new CRF.Factors(crf.parameters));
	}
	constraints = new CRF.Factors(crf.parameters);

	gatherConstraints(ilist);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:19,代码来源:CRFOptimizableByBatchLabelLikelihood.java

示例14: CRFOptimizableByGE

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public CRFOptimizableByGE(CRF crf, ArrayList<GEConstraint> constraints, InstanceList data, StateLabelMap map, int numThreads, double weight) {
  this.crf = crf;
  this.constraints = constraints;
  this.cache = Integer.MAX_VALUE;
  this.cachedValue = Double.NaN;
  this.cachedGradient = new CRF.Factors(crf);
  this.data = data;
  this.numThreads = numThreads;
  this.weight = weight;
  
  instancesWithConstraints = new BitSet(data.size());
  
  for (GEConstraint constraint : constraints) {
    constraint.setStateLabelMap(map);
    BitSet bitset = constraint.preProcess(data);
    instancesWithConstraints.or(bitset);
  }
  this.gpv = DEFAULT_GPV;
  
  if (numThreads > 1) {
    this.executor = (ThreadPoolExecutor)Executors.newFixedThreadPool(numThreads);
  }
  
  createReverseTransitionMatrices(crf);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:26,代码来源:CRFOptimizableByGE.java

示例15: testSpaceViewer

import cc.mallet.types.InstanceList; //导入依赖的package包/类
public void testSpaceViewer () throws IOException
{
  Pipe pipe = TestMEMM.makeSpacePredictionPipe ();
  String[] data0 = { TestCRF.data[0] };
  String[] data1 = { TestCRF.data[1] };

  InstanceList training = new InstanceList (pipe);
  training.addThruPipe (new ArrayIterator (data0));
  InstanceList testing = new InstanceList (pipe);
  testing.addThruPipe (new ArrayIterator (data1));

  CRF crf = new CRF (pipe, null);
  crf.addFullyConnectedStatesForLabels ();
  CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf);
  crft.trainIncremental (training);

  CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf);
  Extraction extraction = extor.extract (new ArrayIterator (data1));

  if (!outputDir.exists ()) outputDir.mkdir ();
  DocumentViewer.writeExtraction (outputDir, extraction);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:TestDocumentViewer.java


注:本文中的cc.mallet.types.InstanceList类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。