当前位置: 首页>>代码示例>>Java>>正文


Java InstanceList.getPipe方法代码示例

本文整理汇总了Java中cc.mallet.types.InstanceList.getPipe方法的典型用法代码示例。如果您正苦于以下问题:Java InstanceList.getPipe方法的具体用法?Java InstanceList.getPipe怎么用?Java InstanceList.getPipe使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cc.mallet.types.InstanceList的用法示例。


在下文中一共展示了InstanceList.getPipe方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: train

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public C45 train (InstanceList trainingList)
{
	FeatureSelection selectedFeatures = trainingList.getFeatureSelection();
	if (selectedFeatures != null)
		// xxx Attend to FeatureSelection!!!
		throw new UnsupportedOperationException ("FeatureSelection not yet implemented.");
	C45.Node root = new C45.Node(trainingList, null, m_minNumInsts);
	splitTree(root, 0);
	C45 tree = new C45 (trainingList.getPipe(), root);
	logger.info("C45 learned: (size=" + tree.getSize() + ")\n");
	tree.print();
	if (m_doPruning) {
		tree.prune();
		logger.info("\nPruned C45: (size=" + tree.getSize() + ")\n");
		root.print();
	}
	root.stopGrowth();
	this.classifier = tree;
	return classifier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:21,代码来源:C45Trainer.java

示例2: getCRF

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public static CRF getCRF(InstanceList training, int[] orders, String defaultLabel, String forbidden, String allowed, boolean connected) { 
  Pattern forbiddenPat = Pattern.compile(forbidden);
  Pattern allowedPat = Pattern.compile(allowed);
  CRF crf = new CRF(training.getPipe(), (Pipe)null);
  String startName = crf.addOrderNStates(training, orders, null,
      defaultLabel, forbiddenPat, allowedPat, connected);
  for (int i = 0; i < crf.numStates(); i++)
    crf.getState(i).setInitialWeight (Transducer.IMPOSSIBLE_WEIGHT);
  crf.getState(startName).setInitialWeight(0.0);
  crf.setWeightsDimensionDensely();
  return crf;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:13,代码来源:SimpleTaggerWithConstraints.java

示例3: testRandomTrainedOn

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private double testRandomTrainedOn (InstanceList training)
{
  ClassifierTrainer trainer = new MaxEntTrainer ();

  Alphabet fd = dictOfSize (3);
  String[] classNames = new String[] {"class0", "class1", "class2"};

  Randoms r = new Randoms (1);
  Iterator<Instance> iter = new RandomTokenSequenceIterator (r,  new Dirichlet(fd, 2.0),
        30, 0, 10, 200, classNames);
  training.addThruPipe (iter);

  InstanceList testing = new InstanceList (training.getPipe ());
  testing.addThruPipe (new RandomTokenSequenceIterator (r,  new Dirichlet(fd, 2.0),
        30, 0, 10, 200, classNames));

  System.out.println ("Training set size = "+training.size());
  System.out.println ("Testing set size = "+testing.size());

  Classifier classifier = trainer.train (training);

  System.out.println ("Accuracy on training set:");
  System.out.println (classifier.getClass().getName()
                        + ": " + new Trial (classifier, training).getAccuracy());

  System.out.println ("Accuracy on testing set:");
  double testAcc = new Trial (classifier, testing).getAccuracy();
  System.out.println (classifier.getClass().getName()
                        + ": " + testAcc);

  return testAcc;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:33,代码来源:TestPagedInstanceList.java

示例4: TokenClassifiers

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public TokenClassifiers(ClassifierTrainer trainer, InstanceList trainList, int randSeed, int numCV)
{
	super(trainList.getPipe());

	m_trainer = trainer;
	m_randSeed = randSeed;
	m_numCV = numCV;
	m_table = new HashMap();

	doTraining(trainList);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:AddClassifierTokenPredictions.java

示例5: MaxEntOptimizableByGE

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
 * @param trainingList List with unlabeled training instances.
 * @param constraints Feature expectation constraints.
 * @param initClassifier Initial classifier.
 */
public MaxEntOptimizableByGE(InstanceList trainingList, ArrayList<MaxEntGEConstraint> constraints, MaxEnt initClassifier) {
  temperature = 1.0;
  objWeight = 1.0;
  gaussianPriorVariance = 1.0;
  this.trainingList = trainingList;
  
  int numFeatures = trainingList.getDataAlphabet().size();
  defaultFeatureIndex = numFeatures;
  int numLabels = trainingList.getTargetAlphabet().size();

  cachedGradient = new double[(numFeatures + 1) * numLabels];
  cachedValue = 0;
     
  if (initClassifier != null) {
    this.parameters = initClassifier.parameters;
    this.classifier = initClassifier;
  }
  else {
    this.parameters = new double[(numFeatures + 1) * numLabels];
    this.classifier = new MaxEnt(trainingList.getPipe(),parameters);
  }
  
   this.constraints = constraints;
   
   for (MaxEntGEConstraint constraint : constraints) {
     constraint.preProcess(trainingList);
   }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:34,代码来源:MaxEntOptimizableByGE.java

示例6: train

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public DecisionTree train (InstanceList trainingList) {
	FeatureSelection selectedFeatures = trainingList.getFeatureSelection();
	DecisionTree.Node root = new DecisionTree.Node (trainingList, null, selectedFeatures);
	splitTree (root, selectedFeatures, 0);
	root.stopGrowth();
	finished = true;
	System.out.println ("DecisionTree learned:");
	root.print();
	this.classifier = new DecisionTree (trainingList.getPipe(), root);
	return classifier;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:12,代码来源:DecisionTreeTrainer.java

示例7: train

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public NaiveBayes train (InstanceList trainingSet)
{

  // Get a classifier trained on the labeled examples only
  NaiveBayes c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet);
  double prevLogLikelihood = 0, logLikelihood = 0;
  boolean converged = false;

  int iteration = 0;
  while (!converged) {
    // Make a new trainingSet that has some labels set
    InstanceList trainingSet2 = new InstanceList (trainingSet.getPipe());
    for (int ii = 0; ii < trainingSet.size(); ii++) {
      Instance inst = trainingSet.get(ii);
      if (inst.getLabeling() != null)
        trainingSet2.add(inst, 1.0);
      else {
        Instance inst2 = inst.shallowCopy();
        inst2.unLock();
        inst2.setLabeling(c.classify(inst).getLabeling());
        inst2.lock();
        trainingSet2.add(inst2, unlabeledDataWeight);
      }
    }
    c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet2);
    logLikelihood = c.dataLogLikelihood (trainingSet2);
    System.err.println ("Loglikelihood = "+logLikelihood);
    // Wait for a change in log-likelihood of less than 0.01% and at least 10 iterations
    if (Math.abs((logLikelihood - prevLogLikelihood)/logLikelihood) < 0.0001)
      converged = true;
    prevLogLikelihood = logLikelihood;
    iteration++;
  }
  return c;    
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:36,代码来源:NaiveBayesEMTrainer.java

示例8: combineLists

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
 * @param li
 * @param lj
 * @return A new {@link InstanceList} where <code>lj</code> is appended to <code>li</code>.
 */
public static InstanceList combineLists (InstanceList li,
																				 InstanceList lj) {
	InstanceList newList = new InstanceList(li.getPipe());
	for (int i = 0; i < li.size(); i++) 
		newList.add(li.get(i));
	for (int i = 0; i < lj.size(); i++) 
		newList.add(lj.get(i));
	return newList;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:15,代码来源:ClusterUtils.java

示例9: DMROptimizable

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public DMROptimizable (InstanceList instances, MaxEnt initialClassifier) {

		this.trainingList = instances;
		Alphabet alphabet = instances.getDataAlphabet();
		Alphabet labelAlphabet = instances.getTargetAlphabet();

		this.numLabels = labelAlphabet.size();

		// Add one feature for the "default feature".
		this.numFeatures = alphabet.size() + 1; // add a spot for the intercept term
            
		//System.out.println("num features: " + numFeatures + " numLabels: " + numLabels);

		this.defaultFeatureIndex = numFeatures - 1;

		this.parameters = new double [numLabels * numFeatures];

		//this.constraints = new double [numLabels * numFeatures];
		this.cachedGradient = new double [numLabels * numFeatures];

		if (initialClassifier != null) {
			this.classifier = initialClassifier;
			this.parameters = classifier.getParameters();
			this.defaultFeatureIndex = classifier.getDefaultFeatureIndex();
			assert (initialClassifier.getInstancePipe() == instances.getPipe());
		}
		else if (this.classifier == null) {
			this.classifier =
				new MaxEnt (instances.getPipe(), parameters);
		}

		formatter = new DecimalFormat("0.###E0");

		cachedValueStale = true;
		cachedGradientStale = true;

		// Initialize the constraints

		logger.fine("Number of instances in training list = " + trainingList.size());

		for (Instance instance : trainingList) {
			FeatureVector multinomialValues = (FeatureVector) instance.getTarget();

			if (multinomialValues == null)
				continue;

			FeatureVector features = (FeatureVector) instance.getData();
			assert (features.getAlphabet() == alphabet);

			boolean hasNaN = false;

			for (int i = 0; i < features.numLocations(); i++) {
				if (Double.isNaN(features.valueAtLocation(i))) {
					logger.info("NaN for feature " + alphabet.lookupObject(features.indexAtLocation(i)).toString()); 
					hasNaN = true;
				}
			}

			if (hasNaN) {
				logger.info("NaN in instance: " + instance.getName());
			}

		}

		//TestMaximizable.testValueAndGradientCurrentParameters (this);
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:67,代码来源:DMROptimizable.java

示例10: train

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
	 * Trains winnow on the instance list, updating 
	 * {@link #weights weights} according to errors
	 * @param ilist Instance list to be trained on
	 * @return Classifier object containing learned weights
	 */
	public Winnow train (InstanceList trainingList)
	{
		FeatureSelection selectedFeatures = trainingList.getFeatureSelection();
		if (selectedFeatures != null)
			// xxx Attend to FeatureSelection!!!
			throw new UnsupportedOperationException ("FeatureSelection not yet implemented.");
		// if "train" is run more than once, 
		// we will be reinitializing the weights
		// TODO: provide method to save weights
		trainingList.getDataAlphabet().stopGrowth();
		trainingList.getTargetAlphabet().stopGrowth();
		Pipe dataPipe = trainingList.getPipe ();
		Alphabet dict = (Alphabet) trainingList.getDataAlphabet ();
		int numLabels = trainingList.getTargetAlphabet().size();
		int numFeats = dict.size(); 
		this.theta =  numFeats * this.nfactor;
		this.weights = new double [numLabels][numFeats];
		// init weights to 1
		for(int i=0; i<numLabels; i++)
			for(int j=0; j<numFeats; j++)
				this.weights[i][j] = 1.0;
		//System.out.println("Init weights to 1.  Theta= "+theta);
		// loop through all instances
		for (int ii = 0; ii < trainingList.size(); ii++){
			Instance inst = (Instance) trainingList.get(ii);
			Labeling labeling = inst.getLabeling ();
			FeatureVector fv = (FeatureVector) inst.getData ();
			double[] results = new double [numLabels]; 
			int fvisize = fv.numLocations();
			int correctIndex = labeling.getBestIndex();
			
			for(int rpos=0; rpos < numLabels; rpos++)
		    results[rpos]=0;
			// sum up xi*wi for each class
			for(int fvi=0; fvi < fvisize; fvi++){
				int fi = fv.indexAtLocation(fvi);
				//System.out.println("feature index "+fi);
				for(int lpos=0; lpos < numLabels; lpos++)
			    results[lpos] += this.weights[lpos][fi];
			}
			//System.out.println("In instance " + ii);
			// make guess for each label using threshold
			// update weights according to alpha and beta 
			// upon incorrect guess
			for(int ri=0; ri < numLabels; ri++){
				if(results[ri] > this.theta){ // guess 1
					if(correctIndex != ri) // correct is 0
				    demote(ri, fv);
				}
				else{ // guess 0
					if(correctIndex == ri) // correct is 1
						promote(ri, fv);   
				}
			}
//			System.out.println("Results guessed:")
//		for(int x=0; x<numLabels; x++)
//		    System.out.println(results[x]);
//			System.out.println("Correct label: "+correctIndex );
//			System.out.println("Weights are");
//			for(int h=0; h<numLabels; h++){
//				for(int g=0; g<numFeats; g++)
//			    System.out.println(weights[h][g]);
//				System.out.println("");
//			}
		}
		classifier = new Winnow (dataPipe, weights, theta, numLabels, numFeats);
		return classifier;
	}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:75,代码来源:WinnowTrainer.java

示例11: setup

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
private void setup (InstanceList instances, Instance instance) {
	assert (instances != null || instance != null);
	if (instance == null && instances != null)
		instance = instances.get(0);
	// Initialize the alphabets
	if (dataAlphabet == null) {
		this.dataAlphabet = instance.getDataAlphabet();
		this.targetAlphabet = instance.getTargetAlphabet();
	}	else if (!Alphabet.alphabetsMatch(instance, this))
		// Make sure the alphabets match 
		throw new IllegalArgumentException ("Training set alphabets do not match those of NaiveBayesTrainer.");

	// Initialize or check the instancePipe
	if (instances != null) {
		if (instancePipe == null)
			instancePipe = instances.getPipe();
		else if (instancePipe != instances.getPipe())
			// Make sure that this pipes match.  Is this really necessary??  
			// I don't think so, but it could be confusing to have each returned classifier have a different pipe?  -akm 1/08
			throw new IllegalArgumentException ("Training set pipe does not match that of NaiveBayesTrainer.");
	}
	
	if (me == null) {
		int numLabels = targetAlphabet.size();
		me = new Multinomial.Estimator[numLabels];
		for (int i = 0; i < numLabels; i++) {
			me[i] = (Multinomial.Estimator) featureEstimator.clone();
			me[i].setAlphabet(dataAlphabet);
		}
		pe = (Multinomial.Estimator) priorEstimator.clone();
	}
	
  if (targetAlphabet.size() > me.length) {
    // target alphabet grew. increase size of our multinomial array
    int targetAlphabetSize = targetAlphabet.size();
    // copy over old values
    Multinomial.Estimator[] newMe = new Multinomial.Estimator[targetAlphabetSize];
    System.arraycopy (me, 0, newMe, 0, me.length);
    // initialize new expanded space
    for (int i= me.length; i<targetAlphabetSize; i++){
      Multinomial.Estimator mest = (Multinomial.Estimator)featureEstimator.clone ();
      mest.setAlphabet (dataAlphabet);
      newMe[i] = mest;
    }
    me = newMe;
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:48,代码来源:NaiveBayesTrainer.java


注:本文中的cc.mallet.types.InstanceList.getPipe方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。