当前位置: 首页>>代码示例>>Java>>正文


Java FeatureVector类代码示例

本文整理汇总了Java中cc.mallet.types.FeatureVector的典型用法代码示例。如果您正苦于以下问题:Java FeatureVector类的具体用法?Java FeatureVector怎么用?Java FeatureVector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


FeatureVector类属于cc.mallet.types包,在下文中一共展示了FeatureVector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: pipe

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public Instance pipe(Instance carrier) {
  if (!(carrier.getData() instanceof FeatureVector)) {
    System.out.println(carrier.getData().getClass());
    throw new IllegalArgumentException("Data must be of type FeatureVector not " + carrier.getData().getClass() + " we got " + carrier.getData());
  }

  if (this.means.length != this.getDataAlphabet().size()
          || this.variances.length != this.getDataAlphabet().size()) {
    throw new GateRuntimeException("Size mismatch, alphabet="+getDataAlphabet().size()+", stats="+means.length);    }

  FeatureVector fv = (FeatureVector) carrier.getData();
  int[] indices = fv.getIndices();
  double[] values = fv.getValues();
  for (int i = 0; i < indices.length; i++) {
    int index = indices[i];
    if(normalize[index]) {
      double value = values[i];
      double mean = means[index];
      double variance = variances[index];
      double newvalue = (value - mean) / Math.sqrt(variance);
      fv.setValue(index, newvalue);
    }
  }
  return carrier;
}
 
开发者ID:GateNLP,项目名称:gateplugin-LearningFramework,代码行数:26,代码来源:PipeScaleMeanVarAll.java

示例2: incorporateOneInstance

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
private void incorporateOneInstance (Instance instance, double instanceWeight) 
{
  Labeling labeling = instance.getLabeling ();
  if (labeling == null) return; // Handle unlabeled instances by skipping them
  FeatureVector fv = (FeatureVector) instance.getData ();
  double oneNorm = fv.oneNorm();
  if (oneNorm <= 0) return; // Skip instances that have no features present
  if (docLengthNormalization > 0)
  	// Make the document have counts that sum to docLengthNormalization
  	// I.e., if 20, it would be as if the document had 20 words.
  	instanceWeight *= docLengthNormalization / oneNorm;
  assert (instanceWeight > 0 && !Double.isInfinite(instanceWeight));
  for (int lpos = 0; lpos < labeling.numLocations(); lpos++) {
    int li = labeling.indexAtLocation (lpos);
    double labelWeight = labeling.valueAtLocation (lpos);
    if (labelWeight == 0) continue;
    //System.out.println ("NaiveBayesTrainer me.increment "+ labelWeight * instanceWeight);
    me[li].increment (fv, labelWeight * instanceWeight);
    // This relies on labelWeight summing to 1 over all labels
    pe.increment (li, labelWeight * instanceWeight);
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:NaiveBayesTrainer.java

示例3: preProcess

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public BitSet preProcess(InstanceList data) {
  // count
  int ii = 0;
  int fi;
  FeatureVector fv;
  BitSet bitSet = new BitSet(data.size());
  for (Instance instance : data) {
    FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
    for (int ip = 0; ip < fvs.size(); ip++) {
      fv = fvs.get(ip);
      for (int loc = 0; loc < fv.numLocations(); loc++) {
        fi = fv.indexAtLocation(loc);
        if (constraints.containsKey(fi)) {
          constraints.get(fi).count += 1;
          bitSet.set(ii);
        }
      }
    }
    ii++;
  }
  return bitSet;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:23,代码来源:OneLabelL2PRConstraints.java

示例4: getScore

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public double getScore(FeatureVector input, int inputPosition,
    int srcIndex, int destIndex, double[] parameters) {
  double dot = 0;
  int li2 = map.getLabelIndex(destIndex);
  for (int i = 0; i < cache.size(); i++) {
    int j = constraintIndices.get(cache.getQuick(i));
    // TODO binary features
    if (normalized) {
      dot += parameters[j + constraints.size() * li2] / constraints.get(cache.getQuick(i)).count; 
    }
    else {
      dot += parameters[j + constraints.size() * li2]; 
    }
  }
  return dot;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:OneLabelL2PRConstraints.java

示例5: getCompositeConstraintFeatureValue

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public double getCompositeConstraintFeatureValue(FeatureVector fv, int ip, int si1, int si2) {
  // to avoid complications with the start state,
  // only consider transitions into states at 
  // position >= 1
  if (ip == 0) {
    return 0;
  }
  
  double value = 0;
  int li1 = map.getLabelIndex(si1);
  if (li1 == StateLabelMap.START_LABEL) {
    return 0;
  }
  
  int li2 = map.getLabelIndex(si2);
  for (int i = 0; i < cache.size(); i++) {
    value += constraintsList.get(cache.getQuick(i)).getValue(li1,li2);
  }
  return value;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:21,代码来源:TwoLabelGEConstraints.java

示例6: serializeObject

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
/** Serializes a single object without metadata
 * @param out
 * @param object
 * @throws IOException 
 */
private void serializeObject (ObjectOutputStream out, Object obj)
throws IOException {
    if (obj instanceof FeatureVector) {
        FeatureVector features = (FeatureVector) obj;
        out.writeChar (TYPE_FEATURE_VECTOR);
        out.writeObject (features.getIndices ());
        out.writeObject (features.getValues ());
    }
    else if (obj instanceof Label) {
            out.writeChar (TYPE_LABEL);
            out.writeObject (((Label) obj).toString ());
    } else {
        out.writeChar (TYPE_OBJECT);
        out.writeObject (obj);
    }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:22,代码来源:PagedInstanceList.java

示例7: testOne

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public void testOne ()
{
	Token t = new Token ("foo");

	t.setProperty ("color", "red");
	t.setProperty ("font", "TimesRoman");

	t.setFeatureValue ("length", 3);
	t.setFeatureValue ("containsVowel", 1);
	t.setFeatureValue ("in /usr/dict/words", 0);

	Alphabet dict = new Alphabet();
	FeatureVector fv = t.toFeatureVector (dict, false);
	assertTrue (fv.numLocations() == 2);
	assertTrue (fv.value (dict.lookupIndex("length")) == 3);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:17,代码来源:TestToken.java

示例8: setUp

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
protected void setUp ()
{
	dict = new Alphabet ();
	fs = new FeatureSequence (dict, 2);
	fs.add (dict.lookupIndex ("a"));
	fs.add (dict.lookupIndex ("n"));
	fs.add (dict.lookupIndex ("d"));
	fs.add (dict.lookupIndex ("r"));
	fs.add (dict.lookupIndex ("e"));
	fs.add (dict.lookupIndex ("w"));
	fs.add (dict.lookupIndex ("m"));
	fs.add (dict.lookupIndex ("c"));
	fs.add (dict.lookupIndex ("c"));
	fs.add (dict.lookupIndex ("a"));
	fs.add (dict.lookupIndex ("l"));
	fs.add (dict.lookupIndex ("l"));
	fs.add (dict.lookupIndex ("u"));
	fs.add (dict.lookupIndex ("m"));
	//System.out.println (fs.toString());
	fv = new FeatureVector (fs);
	//System.out.println (fs.toString());
	//System.out.println (fv.toString());
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:TestFeatureVector.java

示例9: testAddWithPrefix

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public void testAddWithPrefix ()
{
  Alphabet dict = new Alphabet ();
  dict.lookupIndex ("ZERO");
  dict.lookupIndex ("ONE");
  dict.lookupIndex ("TWO");
  dict.lookupIndex ("THREE");

  FeatureVector fv = new FeatureVector (dict, new int[] { 1,3 });

  AugmentableFeatureVector afv = new AugmentableFeatureVector (new Alphabet (), true);
  afv.add (fv, "O:");

  assertEquals (4, dict.size());
  assertEquals (2, afv.getAlphabet ().size());
  assertEquals ("O:ONE\nO:THREE\n", afv.toString ());
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:18,代码来源:TestAugmentableFeatureVector.java

示例10: pipe

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
/**
   * Convert the data in the given <tt>Instance</tt> from a <tt>CharSequence</tt> 
   * of sparse feature-value pairs to a <tt>FeatureVector</tt>
   */
  public Instance pipe(Instance carrier) {

      CharSequence c = (CharSequence) carrier.getData();
      String[] pairs = c.toString().trim().split("\\s+");
      int[] keys = new int[pairs.length];
      double[] values = new double[pairs.length];

      for (int i = 0; i < pairs.length; i++) {
	int delimIndex = pairs[i].lastIndexOf(":");
	if (delimIndex <= 0 || delimIndex == (pairs[i].length()-1)) {
		keys[i] = dataAlphabet.lookupIndex(pairs[i], true);
		values[i] = 1.0;
	}
	else {
		keys[i] = dataAlphabet.lookupIndex(pairs[i].substring(0, delimIndex), true);
		values[i] = Double.parseDouble(pairs[i].substring(delimIndex+1));
	}
      }

// [removed code that sorted indices but NOT values -DM]

      FeatureVector fv = new FeatureVector(dataAlphabet, keys, values);
      carrier.setData( fv );
      return carrier;
  }
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:30,代码来源:Csv2FeatureVector.java

示例11: next

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public Instance next ()
{
	if (currentInstanceIndex < 0) {
		if (currentClassIndex <= 0)
			throw new IllegalStateException ("No next FeatureVector.");
		currentClassIndex--;
		currentInstanceIndex = numInstancesPerClass[currentClassIndex] - 1;
	}
	URI uri = null;
	try { uri = new URI ("random:" + classNames[currentClassIndex] + "/" + currentInstanceIndex); }
	catch (Exception e) {e.printStackTrace(); throw new IllegalStateException (); }
	//xxx Producing small numbers? int randomSize = r.nextPoisson (featureVectorSizePoissonLambda);
	int randomSize = (int)featureVectorSizePoissonLambda;
	FeatureVector fv = classCentroid[currentClassIndex].randomFeatureVector (r, randomSize);
	//logger.fine ("FeatureVector "+currentClassIndex+" "+currentInstanceIndex); fv.print();
	currentInstanceIndex--;
	return new Instance (fv, classNames[currentClassIndex], uri, null);
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:19,代码来源:RandomFeatureVectorIterator.java

示例12: getUnnormalizedClassificationScores

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public void getUnnormalizedClassificationScores (Instance instance, double[] scores)
{
 //  arrayOutOfBounds if pipe has grown since training 
 //        int numFeatures = getAlphabet().size() + 1;
    int numFeatures = this.defaultFeatureIndex + 1;

    int numLabels = getLabelAlphabet().size();
    assert (scores.length == numLabels);
    FeatureVector fv = (FeatureVector) instance.getData ();
    // Make sure the feature vector's feature dictionary matches
    // what we are expecting from our data pipe (and thus our notion
    // of feature probabilities.
    assert (fv.getAlphabet ()
            == this.instancePipe.getDataAlphabet ());

    // Include the feature weights according to each label
    for (int li = 0; li < numLabels; li++) {
        scores[li] = parameters[li*numFeatures + defaultFeatureIndex]
                + MatrixOps.rowDotProduct (parameters, numFeatures,
                        li, fv,
                        defaultFeatureIndex,
                        (perClassFeatureSelection == null
                ? featureSelection
                : perClassFeatureSelection[li]));
    }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:27,代码来源:MCMaxEnt.java

示例13: preProcess

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public void preProcess(FeatureVector input) {
  indexCache.resetQuick();
  if (useValues) valueCache.resetQuick();
  int fi;
  // cache constrained input features
  for (int loc = 0; loc < input.numLocations(); loc++) {
    fi = input.indexAtLocation(loc);
    if (constraints.containsKey(fi)) {
      indexCache.add(fi);
      if (useValues) valueCache.add(input.valueAtLocation(loc));
    }
  }
  
  // default feature, for label regularization
  if (constraints.containsKey(numFeatures)) {
    indexCache.add(numFeatures);
    if (useValues) valueCache.add(1);
  }
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:20,代码来源:MaxEntRangeL2FLGEConstraints.java

示例14: getScore

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public double getScore(FeatureVector input, int label, double[] parameters) {
  double score = 0;
  for (int i = 0; i < indexCache.size(); i++) {
    int ci = constraintIndices.get(indexCache.getQuick(i));
    double param = parameters[ci + label * constraints.size()];
    
    double norm;
    if (normalize) {
      norm = constraints.get(indexCache.getQuick(i)).count;
    }
    else {
      norm = 1;
    }
    
    if (useValues) {
      score += param * valueCache.getQuick(i) / norm;
    }
    else {
      score += param / norm;
    }
  }
  return score;
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:24,代码来源:MaxEntL2FLPRConstraints.java

示例15: getUnnormalizedClassificationScores

import cc.mallet.types.FeatureVector; //导入依赖的package包/类
public void getUnnormalizedClassificationScores (Instance instance, double[] scores)
{
	//  arrayOutOfBounds if pipe has grown since training
	//        int numFeatures = getAlphabet().size() + 1;
	int numFeatures = this.defaultFeatureIndex + 1;

	int numLabels = getLabelAlphabet().size();
	assert (scores.length == numLabels);
	FeatureVector fv = (FeatureVector) instance.getData ();
	// Make sure the feature vector's feature dictionary matches
	// what we are expecting from our data pipe (and thus our notion
	// of feature probabilities.
	assert (fv.getAlphabet ()
			== this.instancePipe.getDataAlphabet ());

	// Include the feature weights according to each label
	for (int li = 0; li < numLabels; li++) {
		scores[li] = parameters[li*numFeatures + defaultFeatureIndex]
		                        + MatrixOps.rowDotProduct (parameters, numFeatures,
		                        		li, fv,
		                        		defaultFeatureIndex,
		                        		(perClassFeatureSelection == null
		                        				? featureSelection
		                        						: perClassFeatureSelection[li]));
	}
}
 
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:27,代码来源:MaxEnt.java


注:本文中的cc.mallet.types.FeatureVector类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。