Java InstanceList.getDataAlphabet方法代码示例

本文整理汇总了Java中cc.mallet.types.InstanceList.getDataAlphabet方法的典型用法代码示例。如果您正苦于以下问题：Java InstanceList.getDataAlphabet方法的具体用法？Java InstanceList.getDataAlphabet怎么用？Java InstanceList.getDataAlphabet使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cc.mallet.types.InstanceList的用法示例。

在下文中一共展示了InstanceList.getDataAlphabet方法的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: addFullyConnectedStatesForThreeQuarterLabels

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public void addFullyConnectedStatesForThreeQuarterLabels (InstanceList trainingSet)
{
	int numLabels = outputAlphabet.size();
	for (int i = 0; i < numLabels; i++) {
		String[] destinationNames = new String[numLabels];
		String[][] weightNames = new String[numLabels][];
		for (int j = 0; j < numLabels; j++) {
			String labelName = (String)outputAlphabet.lookupObject(j);
			destinationNames[j] = labelName;
			weightNames[j] = new String[2];
			// The "half-labels" will include all observational tests
			weightNames[j][0] = labelName;
			// The "transition" weights will include only the default feature
			String wn = (String)outputAlphabet.lookupObject(i) + "->" + (String)outputAlphabet.lookupObject(j);
			weightNames[j][1] = wn;
			int wi = getWeightsIndex (wn);
			// A new empty FeatureSelection won't allow any features here, so we only
			// get the default feature for transitions
			featureSelections[wi] = new FeatureSelection(trainingSet.getDataAlphabet());
		}
		addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0,
				destinationNames, destinationNames, weightNames);
	}
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:25，代码来源:CRF.java

示例2: addStatesForThreeQuarterLabelsConnectedAsIn

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
  * Add as many states as there are labels, but don't create separate
  * observational-test-weights for each source-destination pair of
  * states---instead have all the incoming transitions to a state share the
  * same observational-feature-test weights. However, do create separate
  * default feature for each transition, (which acts as an HMM-style transition
  * probability).
  */
public void addStatesForThreeQuarterLabelsConnectedAsIn (InstanceList trainingSet)
{
	int numLabels = outputAlphabet.size();
	boolean[][] connections = labelConnectionsIn (trainingSet);
	for (int i = 0; i < numLabels; i++) {
		int numDestinations = 0;
		for (int j = 0; j < numLabels; j++)
			if (connections[i][j]) numDestinations++;
		String[] destinationNames = new String[numDestinations];
		String[][] weightNames = new String[numDestinations][];
		int destinationIndex = 0;
		for (int j = 0; j < numLabels; j++)
			if (connections[i][j]) {
				String labelName = (String)outputAlphabet.lookupObject(j);
				destinationNames[destinationIndex] = labelName;
				weightNames[destinationIndex] = new String[2];
				// The "half-labels" will include all observed tests
				weightNames[destinationIndex][0] = labelName;
				// The "transition" weights will include only the default feature
				String wn = (String)outputAlphabet.lookupObject(i) + "->" + (String)outputAlphabet.lookupObject(j);
				weightNames[destinationIndex][1] = wn;
				int wi = getWeightsIndex (wn);
				// A new empty FeatureSelection won't allow any features here, so we only
				// get the default feature for transitions
				featureSelections[wi] = new FeatureSelection(trainingSet.getDataAlphabet());
				destinationIndex++;
			}
		addState ((String)outputAlphabet.lookupObject(i), 0.0, 0.0,
				destinationNames, destinationNames, weightNames);
	}
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:40，代码来源:CRF.java

示例3: Node

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public Node (InstanceList ilist, Node parent, FeatureSelection fs)
{
	InfoGain ig = new InfoGain (ilist);
	this.featureIndex = ig.getMaxValuedIndexIn (fs);
	this.infoGain = ig.value(featureIndex);
	this.ilist = ilist;
	this.dictionary = ilist.getDataAlphabet();
	this.parent = parent;
	this.labeling = ig.getBaseLabelDistribution();
	this.labelEntropy = ig.getBaseEntropy();
	this.child0 = this.child1 = null;
}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:13，代码来源:DecisionTree.java

示例4: DMROptimizable

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
public DMROptimizable (InstanceList instances, MaxEnt initialClassifier) {

		this.trainingList = instances;
		Alphabet alphabet = instances.getDataAlphabet();
		Alphabet labelAlphabet = instances.getTargetAlphabet();

		this.numLabels = labelAlphabet.size();

		// Add one feature for the "default feature".
		this.numFeatures = alphabet.size() + 1; // add a spot for the intercept term
            
		//System.out.println("num features: " + numFeatures + " numLabels: " + numLabels);

		this.defaultFeatureIndex = numFeatures - 1;

		this.parameters = new double [numLabels * numFeatures];

		//this.constraints = new double [numLabels * numFeatures];
		this.cachedGradient = new double [numLabels * numFeatures];

		if (initialClassifier != null) {
			this.classifier = initialClassifier;
			this.parameters = classifier.getParameters();
			this.defaultFeatureIndex = classifier.getDefaultFeatureIndex();
			assert (initialClassifier.getInstancePipe() == instances.getPipe());
		}
		else if (this.classifier == null) {
			this.classifier =
				new MaxEnt (instances.getPipe(), parameters);
		}

		formatter = new DecimalFormat("0.###E0");

		cachedValueStale = true;
		cachedGradientStale = true;

		// Initialize the constraints

		logger.fine("Number of instances in training list = " + trainingList.size());

		for (Instance instance : trainingList) {
			FeatureVector multinomialValues = (FeatureVector) instance.getTarget();

			if (multinomialValues == null)
				continue;

			FeatureVector features = (FeatureVector) instance.getData();
			assert (features.getAlphabet() == alphabet);

			boolean hasNaN = false;

			for (int i = 0; i < features.numLocations(); i++) {
				if (Double.isNaN(features.valueAtLocation(i))) {
					logger.info("NaN for feature " + alphabet.lookupObject(features.indexAtLocation(i)).toString()); 
					hasNaN = true;
				}
			}

			if (hasNaN) {
				logger.info("NaN in instance: " + instance.getName());
			}

		}

		//TestMaximizable.testValueAndGradientCurrentParameters (this);
	}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:67，代码来源:DMROptimizable.java

示例5: train

import cc.mallet.types.InstanceList; //导入方法依赖的package包/类
/**
	 * Trains winnow on the instance list, updating 
	 * {@link #weights weights} according to errors
	 * @param ilist Instance list to be trained on
	 * @return Classifier object containing learned weights
	 */
	public Winnow train (InstanceList trainingList)
	{
		FeatureSelection selectedFeatures = trainingList.getFeatureSelection();
		if (selectedFeatures != null)
			// xxx Attend to FeatureSelection!!!
			throw new UnsupportedOperationException ("FeatureSelection not yet implemented.");
		// if "train" is run more than once, 
		// we will be reinitializing the weights
		// TODO: provide method to save weights
		trainingList.getDataAlphabet().stopGrowth();
		trainingList.getTargetAlphabet().stopGrowth();
		Pipe dataPipe = trainingList.getPipe ();
		Alphabet dict = (Alphabet) trainingList.getDataAlphabet ();
		int numLabels = trainingList.getTargetAlphabet().size();
		int numFeats = dict.size(); 
		this.theta =  numFeats * this.nfactor;
		this.weights = new double [numLabels][numFeats];
		// init weights to 1
		for(int i=0; i<numLabels; i++)
			for(int j=0; j<numFeats; j++)
				this.weights[i][j] = 1.0;
		//System.out.println("Init weights to 1.  Theta= "+theta);
		// loop through all instances
		for (int ii = 0; ii < trainingList.size(); ii++){
			Instance inst = (Instance) trainingList.get(ii);
			Labeling labeling = inst.getLabeling ();
			FeatureVector fv = (FeatureVector) inst.getData ();
			double[] results = new double [numLabels]; 
			int fvisize = fv.numLocations();
			int correctIndex = labeling.getBestIndex();
			
			for(int rpos=0; rpos < numLabels; rpos++)
		    results[rpos]=0;
			// sum up xi*wi for each class
			for(int fvi=0; fvi < fvisize; fvi++){
				int fi = fv.indexAtLocation(fvi);
				//System.out.println("feature index "+fi);
				for(int lpos=0; lpos < numLabels; lpos++)
			    results[lpos] += this.weights[lpos][fi];
			}
			//System.out.println("In instance " + ii);
			// make guess for each label using threshold
			// update weights according to alpha and beta 
			// upon incorrect guess
			for(int ri=0; ri < numLabels; ri++){
				if(results[ri] > this.theta){ // guess 1
					if(correctIndex != ri) // correct is 0
				    demote(ri, fv);
				}
				else{ // guess 0
					if(correctIndex == ri) // correct is 1
						promote(ri, fv);   
				}
			}
//			System.out.println("Results guessed:")
//		for(int x=0; x<numLabels; x++)
//		    System.out.println(results[x]);
//			System.out.println("Correct label: "+correctIndex );
//			System.out.println("Weights are");
//			for(int h=0; h<numLabels; h++){
//				for(int g=0; g<numFeats; g++)
//			    System.out.println(weights[h][g]);
//				System.out.println("");
//			}
		}
		classifier = new Winnow (dataPipe, weights, theta, numLabels, numFeats);
		return classifier;
	}

开发者ID:kostagiolasn，项目名称:NucleosomePatternClassifier，代码行数:75，代码来源:WinnowTrainer.java

注：本文中的cc.mallet.types.InstanceList.getDataAlphabet方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。