当前位置: 首页>>代码示例>>Java>>正文


Java HashIndex类代码示例

本文整理汇总了Java中edu.stanford.nlp.util.HashIndex的典型用法代码示例。如果您正苦于以下问题:Java HashIndex类的具体用法?Java HashIndex怎么用?Java HashIndex使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


HashIndex类属于edu.stanford.nlp.util包,在下文中一共展示了HashIndex类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: recalculateTemporaryBetas

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
 * Creates temporary beta data structures and fills them in by
 * iterating over the trees.
 */
public void recalculateTemporaryBetas(boolean splitStates, Map<String, double[]> totalStateMass,
                                      TwoDimensionalMap<String, String, double[][]> tempUnaryBetas,
                                      ThreeDimensionalMap<String, String, String, double[][][]> tempBinaryBetas) {
  tempWordIndex = new HashIndex<String>();
  tempTagIndex = new HashIndex<String>();
  tempLex = op.tlpParams.lex(op, tempWordIndex, tempTagIndex);
  tempLex.initializeTraining(trainSize);

  for (Tree tree : trees) {
    double weight = treeWeights.getCount(tree);
    if (DEBUG()) {
      System.out.println("Incrementing trees read: " + weight);
    }
    tempLex.incrementTreesRead(weight);
    recalculateTemporaryBetas(tree, splitStates, totalStateMass, tempUnaryBetas, tempBinaryBetas);
  }

  tempLex.finishTraining();
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:24,代码来源:SplittingGrammarExtractor.java

示例2: tagProject

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
private short tagProject(short tag) {
  if (smoothTPIndex == null) {
    smoothTPIndex = new HashIndex<String>(tagIndex);
  }
  if (tag < 0) {
    return tag;
  } else {
    String tagStr = smoothTPIndex.get(tag);
    String binStr = TP_PREFIX + smoothTP.project(tagStr);
    return (short) smoothTPIndex.indexOf(binStr, true);
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:13,代码来源:MLEDependencyGrammar.java

示例3: initTagBins

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
protected void initTagBins() {
  Index<String> tagBinIndex = new HashIndex<String>();
  if (DEBUG) {
    System.err.println();
    System.err.println("There are " + tagIndex.size() + " tags.");
  }
  tagBin = new int[tagIndex.size()];
  for (int t = 0; t < tagBin.length; t++) {
    String tagStr = tagIndex.get(t);
    String binStr;
    if (tagProjection == null) {
      binStr = tagStr;
    } else {
      binStr = tagProjection.project(tagStr);
    }
    tagBin[t] = tagBinIndex.indexOf(binStr, true);
    if (DEBUG) {
      System.err.println("initTagBins: Mapped " + tagStr + " (" + t +
                         ") to " + binStr + " (" + tagBin[t] + ")");
    }
  }
  numTagBins = tagBinIndex.size();
  if (DEBUG) {
    System.err.println("initTagBins: tags " + tagBin.length + " bins " +
                       numTagBins);
    System.err.println("tagBins: " + tagBinIndex);
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:29,代码来源:AbstractDependencyGrammar.java

示例4: addGuess

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
protected void addGuess(L guess, L label, boolean addUnknownLabels)
{
  if (label == null) {
      noLabel++;
      return;
  }
  if (addUnknownLabels) {
    if (labelIndex == null) {
      labelIndex = new HashIndex<L>();
    }
    labelIndex.add(guess);
    labelIndex.add(label);
  }
  if (guess.equals(label)) {
    correctGuesses.incrementCount(label);
    tokensCorrect++;
  }

  if (!guess.equals(negLabel)) {
    foundGuessed.incrementCount(guess);
  }

  if (!label.equals(negLabel)) {
    foundCorrect.incrementCount(label);
  }
  tokensCount++;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:28,代码来源:MultiClassPrecisionRecallExtendedStats.java

示例5: initialize

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
@Override
protected void initialize(int numDatums) {
  labelIndex = new HashIndex<L>();
  featureIndex = new HashIndex<F>();
  labels = new int[numDatums];
  data = new int[numDatums][];
  values = new double[numDatums][];
  sourcesAndIds = new ArrayList<Pair<String, String>>(numDatums);
  size = 0;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:11,代码来源:RVFDataset.java

示例6: selectFeaturesFromSet

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
 * Removes all features from the dataset that are not in featureSet.
 *
 * @param featureSet
 */
public void selectFeaturesFromSet(Set<F> featureSet) {
  HashIndex<F> newFeatureIndex = new HashIndex<F>();
  int[] featMap = new int[featureIndex.size()];
  Arrays.fill(featMap, -1);
  for (F feature : featureSet) {
    int oldID = featureIndex.indexOf(feature);
    if (oldID >= 0) { // it's a valid feature in the index
      int newID = newFeatureIndex.indexOf(feature, true);
      featMap[oldID] = newID;
    }
  }
  featureIndex = newFeatureIndex;
  for (int i = 0; i < size; i++) {
    List<Integer> featList = new ArrayList<Integer>(data[i].length);
    List<Double> valueList = new ArrayList<Double>(values[i].length);
    for (int j = 0; j < data[i].length; j++) {
      if (featMap[data[i][j]] >= 0) {
        featList.add(featMap[data[i][j]]);
        valueList.add(values[i][j]);
      }
    }
    data[i] = new int[featList.size()];
    values[i] = new double[valueList.size()];
    for (int j = 0; j < data[i].length; j++) {
      data[i][j] = featList.get(j);
      values[i][j] = valueList.get(j);
    }
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:35,代码来源:RVFDataset.java

示例7: applyFeatureCountThreshold

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
 * Applies a feature count threshold to the RVFDataset. All features that
 * occur fewer than <i>k</i> times are expunged.
 */
public void applyFeatureCountThreshold(int k) {
  float[] counts = getFeatureCounts();
  HashIndex<F> newFeatureIndex = new HashIndex<F>();

  int[] featMap = new int[featureIndex.size()];
  for (int i = 0; i < featMap.length; i++) {
    F feat = featureIndex.get(i);
    if (counts[i] >= k) {
      int newIndex = newFeatureIndex.size();
      newFeatureIndex.add(feat);
      featMap[i] = newIndex;
    } else {
      featMap[i] = -1;
    }
    // featureIndex.remove(feat);
  }

  featureIndex = newFeatureIndex;
  // counts = null; // This is unnecessary; JVM can clean it up

  for (int i = 0; i < size; i++) {
    List<Integer> featList = new ArrayList<Integer>(data[i].length);
    List<Double> valueList = new ArrayList<Double>(values[i].length);
    for (int j = 0; j < data[i].length; j++) {
      if (featMap[data[i][j]] >= 0) {
        featList.add(featMap[data[i][j]]);
        valueList.add(values[i][j]);
      }
    }
    data[i] = new int[featList.size()];
    values[i] = new double[valueList.size()];
    for (int j = 0; j < data[i].length; j++) {
      data[i][j] = featList.get(j);
      values[i][j] = valueList.get(j);
    }
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:42,代码来源:RVFDataset.java

示例8: applyFeatureMaxCountThreshold

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
 * Applies a feature max count threshold to the RVFDataset. All features that
 * occur greater than <i>k</i> times are expunged.
 */
public void applyFeatureMaxCountThreshold(int k) {
  float[] counts = getFeatureCounts();
  HashIndex<F> newFeatureIndex = new HashIndex<F>();

  int[] featMap = new int[featureIndex.size()];
  for (int i = 0; i < featMap.length; i++) {
    F feat = featureIndex.get(i);
    if (counts[i] <= k) {
      int newIndex = newFeatureIndex.size();
      newFeatureIndex.add(feat);
      featMap[i] = newIndex;
    } else {
      featMap[i] = -1;
    }
    // featureIndex.remove(feat);
  }

  featureIndex = newFeatureIndex;
  // counts = null; // This is unnecessary; JVM can clean it up

  for (int i = 0; i < size; i++) {
    List<Integer> featList = new ArrayList<Integer>(data[i].length);
    List<Double> valueList = new ArrayList<Double>(values[i].length);
    for (int j = 0; j < data[i].length; j++) {
      if (featMap[data[i][j]] >= 0) {
        featList.add(featMap[data[i][j]]);
        valueList.add(values[i][j]);
      }
    }
    data[i] = new int[featList.size()];
    values[i] = new double[valueList.size()];
    for (int j = 0; j < data[i].length; j++) {
      data[i][j] = featList.get(j);
      values[i][j] = valueList.get(j);
    }
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:42,代码来源:RVFDataset.java

示例9: applyFeatureCountThreshold

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
 * Applies a feature count threshold to the Dataset.  All features that
 * occur fewer than <i>k</i> times are expunged.
 */
public void applyFeatureCountThreshold(int k) {
  float[] counts = getFeatureCounts();
  Index<F> newFeatureIndex = new HashIndex<F>();

  int[] featMap = new int[featureIndex.size()];
  for (int i = 0; i < featMap.length; i++) {
    F feat = featureIndex.get(i);
    if (counts[i] >= k) {
      int newIndex = newFeatureIndex.size();
      newFeatureIndex.add(feat);
      featMap[i] = newIndex;
    } else {
      featMap[i] = -1;
    }
    // featureIndex.remove(feat);
  }

  featureIndex = newFeatureIndex;
  // counts = null; // This is unnecessary; JVM can clean it up

  for (int i = 0; i < size; i++) {
    List<Integer> featList = new ArrayList<Integer>(data[i].length);
    for (int j = 0; j < data[i].length; j++) {
      if (featMap[data[i][j]] >= 0) {
        featList.add(featMap[data[i][j]]);
      }
    }
    data[i] = new int[featList.size()];
    for (int j = 0; j < data[i].length; j++) {
      data[i][j] = featList.get(j);
    }
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:38,代码来源:GeneralDataset.java

示例10: applyFeatureMaxCountThreshold

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
 * Applies a max feature count threshold to the Dataset.  All features that
 * occur greater than <i>k</i> times are expunged.
 */
public void applyFeatureMaxCountThreshold(int k) {
  float[] counts = getFeatureCounts();
  HashIndex<F> newFeatureIndex = new HashIndex<F>();

  int[] featMap = new int[featureIndex.size()];
  for (int i = 0; i < featMap.length; i++) {
    F feat = featureIndex.get(i);
    if (counts[i] <= k) {
      int newIndex = newFeatureIndex.size();
      newFeatureIndex.add(feat);
      featMap[i] = newIndex;
    } else {
      featMap[i] = -1;
    }
    // featureIndex.remove(feat);
  }

  featureIndex = newFeatureIndex;
  // counts = null; // This is unnecessary; JVM can clean it up

  for (int i = 0; i < size; i++) {
    List<Integer> featList = new ArrayList<Integer>(data[i].length);
    for (int j = 0; j < data[i].length; j++) {
      if (featMap[data[i][j]] >= 0) {
        featList.add(featMap[data[i][j]]);
      }
    }
    data[i] = new int[featList.size()];
    for (int j = 0; j < data[i].length; j++) {
      data[i][j] = featList.get(j);
    }
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:38,代码来源:GeneralDataset.java

示例11: initialize

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
@Override
protected final void initialize(int numDatums) {
  labelIndex = new HashIndex<L>();
  featureIndex = new HashIndex<F>();
  labels = new int[numDatums];
  data = new int[numDatums][];
  size = 0;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:9,代码来源:Dataset.java

示例12: selectFeatures

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/**
 * Generic method to select features based on the feature scores vector provided as an argument.
 * @param numFeatures number of features to be selected.
 * @param scores a vector of size total number of features in the data.
 */
public void selectFeatures(int numFeatures, double[] scores) {

  List<ScoredObject<F>> scoredFeatures = new ArrayList<ScoredObject<F>>();

  for (int i = 0; i < scores.length; i++) {
    scoredFeatures.add(new ScoredObject<F>(featureIndex.get(i), scores[i]));
  }

  Collections.sort(scoredFeatures, ScoredComparator.DESCENDING_COMPARATOR);
  Index<F> newFeatureIndex = new HashIndex<F>();
  for (int i = 0; i < scoredFeatures.size() && i < numFeatures; i++) {
    newFeatureIndex.add(scoredFeatures.get(i).object());
    //System.err.println(scoredFeatures.get(i));
  }

  for (int i = 0; i < size; i++) {
    int[] newData = new int[data[i].length];
    int curIndex = 0;
    for (int j = 0; j < data[i].length; j++) {
      int index;
      if ((index = newFeatureIndex.indexOf(featureIndex.get(data[i][j]))) != -1) {
        newData[curIndex++] = index;
      }
    }
    int[] newDataTrimmed = new int[curIndex];
    System.arraycopy(newData, 0, newDataTrimmed, 0, curIndex);
    data[i] = newDataTrimmed;
  }
  featureIndex = newFeatureIndex;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:36,代码来源:Dataset.java

示例13: getFeaturesAboveThreshhold

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
public Index<String> getFeaturesAboveThreshhold(Dataset<String, String> dataset, double thresh) {
  if (!(classifier instanceof LinearClassifier)) {
    throw new RuntimeException("Attempting to remove features based on weight from a non-linear classifier");
  }
  Index<String> featureIndex = dataset.featureIndex;
  Index<String> labelIndex = dataset.labelIndex;

  Index<String> features = new HashIndex<String>();
  Iterator<String> featureIt = featureIndex.iterator();
  LinearClassifier<String, String> lc = (LinearClassifier<String, String>)classifier;
  LOOP:
  while (featureIt.hasNext()) {
    String f = featureIt.next();
    Iterator<String> labelIt = labelIndex.iterator();
    double smallest = Double.POSITIVE_INFINITY;
    double biggest = Double.NEGATIVE_INFINITY;
    while (labelIt.hasNext()) {
      String l = labelIt.next();
      double weight = lc.weight(f, l);
      if (weight < smallest) {
        smallest = weight;
      }
      if (weight > biggest) {
        biggest = weight;
      }
      if (biggest - smallest > thresh) {
        features.add(f);
        continue LOOP;
      }
    }
  }
  return features;
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:34,代码来源:CMMClassifier.java

示例14: makeAnswerArraysAndTagIndex

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
/** This routine builds the <code>answerArrays</code> which give the
 *  empirically legal label sequences (of length (order) at most
 *  <code>flags.maxLeft</code>) and the <code>classIndex</code>,
 *  which indexes known answer classes.
 *
 * @param docs The training data: A List of List of CoreLabel
 */
private void makeAnswerArraysAndTagIndex(Collection<List<IN>> docs) {
  if (answerArrays == null) {
    answerArrays = Generics.newHashSet();
  }
  if (classIndex == null) {
    classIndex = new HashIndex<String>();
  }

  for (List<IN> doc : docs) {
    if (flags.useReverse) {
      Collections.reverse(doc);
    }

    int leng = doc.size();
    for (int start = 0; start < leng; start++) {
      for (int diff = 1; diff <= flags.maxLeft && start + diff <= leng; diff++) {
        String[] seq = new String[diff];
        for (int i = start; i < start + diff; i++) {
          seq[i - start] = doc.get(i).get(CoreAnnotations.AnswerAnnotation.class);
        }
        answerArrays.add(Arrays.asList(seq));
      }
    }
    for (int i = 0; i < leng; i++) {
      CoreLabel wordInfo = doc.get(i);
      classIndex.add(wordInfo.get(CoreAnnotations.AnswerAnnotation.class));
    }

    if (flags.useReverse) {
      Collections.reverse(doc);
    }
  }
}
 
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:41,代码来源:CMMClassifier.java

示例15: initialBetasAndLexicon

import edu.stanford.nlp.util.HashIndex; //导入依赖的package包/类
private void initialBetasAndLexicon() {
  wordIndex = new HashIndex<String>();
  tagIndex = new HashIndex<String>();
  lex = op.tlpParams.lex(op, wordIndex, tagIndex);
  lex.initializeTraining(trainSize);

  for (Tree tree : trees) {
    double weight = treeWeights.getCount(tree);
    lex.incrementTreesRead(weight);
    initialBetasAndLexicon(tree, 0, weight);
  }

  lex.finishTraining();
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:15,代码来源:SplittingGrammarExtractor.java


注:本文中的edu.stanford.nlp.util.HashIndex类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。