当前位置: 首页>>代码示例>>Java>>正文


Java ClassicCounter类代码示例

本文整理汇总了Java中edu.stanford.nlp.stats.ClassicCounter的典型用法代码示例。如果您正苦于以下问题:Java ClassicCounter类的具体用法?Java ClassicCounter怎么用?Java ClassicCounter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


ClassicCounter类属于edu.stanford.nlp.stats包,在下文中一共展示了ClassicCounter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getIDFMapForDocument

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * Get an IDF map for the given document string.
 *
 * @param document
 * @return
 */
private static Counter<String> getIDFMapForDocument(String document) {
  // Clean up -- remove some Gigaword patterns that slow things down
  // / don't help anything
  document = headingSeparator.matcher(document).replaceAll("");

  DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document));
  preprocessor.setTokenizerFactory(tokenizerFactory);

  Counter<String> idfMap = new ClassicCounter<String>();
  for (List<HasWord> sentence : preprocessor) {
    if (sentence.size() > MAX_SENTENCE_LENGTH)
      continue;

    List<TaggedWord> tagged = tagger.tagSentence(sentence);

    for (TaggedWord w : tagged) {
      if (w.tag().startsWith("n"))
        idfMap.incrementCount(w.word());
    }
  }

  return idfMap;
}
 
开发者ID:asmehra95,项目名称:wiseowl,代码行数:30,代码来源:DocumentFrequencyCounter.java

示例2: existsTokenMatch

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public boolean existsTokenMatch(List<String> exampleTokens, List<String> exampleLemmas, Set<String> fbDescs) {
  // generate stems
  List<String> exampleStems = new ArrayList<String>();
  for (String token : exampleTokens)
    exampleStems.add(stemmer.stem(token));

  Counter<String> tokenFeatures = new ClassicCounter<String>();
  Counter<String> stemFeatures = new ClassicCounter<String>();
  for (String fbDescription : fbDescs) {
    List<String> fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription);
    List<String> fbDescStems = new ArrayList<>();
    for (String fbDescToken : fbDescTokens)
      fbDescStems.add(stemmer.stem(fbDescToken));

    Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true));
    Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true));
    Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false));
    if (tokenFeatures.size() > 0 || stemFeatures.size() > 0)
      return true;
  }
  return false;
}
 
开发者ID:cgraywang,项目名称:TextHIN,代码行数:23,代码来源:TextToTextMatcher.java

示例3: extractTokenMatchFeatures

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
private void extractTokenMatchFeatures(List<String> exampleTokens, List<String> exampleLemmas, Set<String> fbDescs, FeatureVector vector) {
  if (!FeatureExtractor.containsDomain("tokenMatch")) return;

  // generate stems
  List<String> exampleStems = new ArrayList<>();
  for (String token : exampleTokens)
    exampleStems.add(stemmer.stem(token));

  Counter<String> tokenFeatures = new ClassicCounter<>();
  Counter<String> stemFeatures = new ClassicCounter<>();
  for (String fbDescription : fbDescs) {
    List<String> fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription);
    List<String> fbDescStems = new ArrayList<>();
    for (String fbDescToken : fbDescTokens)
      fbDescStems.add(stemmer.stem(fbDescToken));

    Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true));
    Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true));
    Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false));
  }
  if (opts.verbose >= 3) {
    LogInfo.logs("Binary formula desc: %s, token match: %s, stem match: %s", fbDescs, tokenFeatures, stemFeatures);
  }
  addFeaturesToVector(tokenFeatures, "binary_token", vector);
  addFeaturesToVector(stemFeatures, "binary_stem", vector);
}
 
开发者ID:cgraywang,项目名称:TextHIN,代码行数:27,代码来源:TextToTextMatcher.java

示例4: features

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public static Counter<String> features(KBPInput input) {
    // Ensure RegexNER Tags!
    input.sentence.regexner(IntelConfig.Regex_NER_caseless, false);
    input.sentence.regexner(IntelConfig.Regex_NER_cased, true);

    // Get useful variables
    ClassicCounter<String> feats = new ClassicCounter<>();
    if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {
        return new ClassicCounter<>();
    }

    // Actually featurize
    denseFeatures(input, input.sentence, feats);
    surfaceFeatures(input, input.sentence, feats);
    dependencyFeatures(input, input.sentence, feats);
    relationSpecificFeatures(input, input.sentence, feats);

    return feats;
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:20,代码来源:KBPStatisticalExtractor.java

示例5: features

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public static Counter<String> features(KBPInput input) {
  // Ensure RegexNER Tags!
  input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASED, false);
  input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASELESS, true);

  // Get useful variables
  ClassicCounter<String> feats = new ClassicCounter<>();
  if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {
    return new ClassicCounter<>();
  }

  // Actually featurize
  denseFeatures(input, input.sentence, feats);
  surfaceFeatures(input, input.sentence, feats);
  dependencyFeatures(input, input.sentence, feats);
  relationSpecificFeatures(input, input.sentence, feats);

  return feats;
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:20,代码来源:KBPStatisticalExtractor.java

示例6: getWordDistributionsPerTopic

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * Analysis and Debugging
 * @param wordIndex The index of words to integer IDs.
 */
private List<double[]> getWordDistributionsPerTopic(ClassicCounter<Integer>[] countsBySlot, double smoothing, double smoothingTimesNum, Index<String> wordIndex) {
  //    System.out.println("Calling getWordDistPerTopic...wordIndex size " + wordIndex.size());
  List<double[]> dists = new ArrayList<double[]>(numTopics);
  for( int topic = 0; topic < numTopics; topic++ ) {
    double[] dist = new double[wordIndex.size()];
    dists.add(dist);

    for( int ii = 0; ii < wordIndex.size(); ii++ ) {
      double probOfWGivenTopic = (countsBySlot[topic].getCount(ii) + smoothing) / (countsBySlot[topic].totalCount() + smoothingTimesNum);
      //        System.out.println("P(w=" + wordIndex.get(ii) + "|slot=" + topic + ") \t= " + probOfWGivenTopic);
      dist[ii] = probOfWGivenTopic;
    }
  }
  return dists;
}
 
开发者ID:nchambers,项目名称:probschemas,代码行数:20,代码来源:GibbsSamplerEntities.java

示例7: storeAll

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public void storeAll(
    int[][] zs,
    double[] topicCounts, 
    int[][] topicCountsByDoc,
    ClassicCounter<Integer>[] wCountsBySlot, 
    ClassicCounter<Integer>[] verbCountsBySlot,
    ClassicCounter<Integer>[] depCountsBySlot,
    ClassicCounter<Integer>[] featCountsBySlot) {

  this.zs = new int[zs.length][];
  for( int xx = 0; xx < zs.length; xx++ )
    this.zs[xx] = Arrays.copyOf(zs[xx], zs[xx].length);
  
  this.topicCounts = Arrays.copyOf(topicCounts, topicCounts.length);
  this.topicCountsByDoc = new int[topicCountsByDoc.length][];
  for( int xx = 0; xx < topicCountsByDoc.length; xx++ )
    this.topicCountsByDoc[xx] = Arrays.copyOf(topicCountsByDoc[xx], topicCountsByDoc[xx].length);
  
  this.wCountsBySlot = cloneCounter(wCountsBySlot);
  this.verbCountsBySlot = cloneCounter(verbCountsBySlot);
  this.depCountsBySlot = cloneCounter(depCountsBySlot);
  this.featCountsBySlot = cloneCounter(featCountsBySlot);
}
 
开发者ID:nchambers,项目名称:probschemas,代码行数:24,代码来源:EntityModelInstance.java

示例8: createEventDocumentTimeDatum

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public TLinkDatum createEventDocumentTimeDatum(TextEvent event, Timex time, TLink.TYPE label, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();

  // Sanity check
  if( event == null ) 
    System.out.println("Null event in createEventDocumentTimeDatum(): " + event + " and " + time);
  
  feats.addAll(getSingleEventPOSFeatures("pos1", event, trees));
  feats.addAll(getSingleEventFeatures(event, trees));      // tense, modality, etc.
  feats.addAll(getSingleEventTokenFeatures(1, event, trees));      // token, lemma, wordnet
  feats.addAll(getSingleEventNearbyBOWFeatures(event, trees)); // +.01 accuracy, very minimal.

  TLinkDatum datum = new TLinkDatum(label);
  datum.addFeatures(feats);
  datum.setType(TLinkDatum.TYPE.EDCT);
  return datum;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:18,代码来源:TLinkFeaturizer.java

示例9: getSingleEventPOSFeatures

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * Event features using just its event POS tags.
 */
private Counter<String> getSingleEventPOSFeatures(String featprefix, TextEvent event1, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();

  Tree tree1 = trees.get(event1.sid());
  
  String pos10 = TreeOperator.indexToPOSTag(tree1, event1.index());
  String pos11 = TreeOperator.indexToPOSTag(tree1, event1.index()-1);
  String pos12 = TreeOperator.indexToPOSTag(tree1, event1.index()-2);
  if( event1.index() == 2 ) {
    pos12 = "<s>";
  } else if( event1.index() == 1 ) {
    pos11 = "<s>";
    pos12 = "<pre-s>";
  }
  feats.incrementCount(featprefix + "-0-" + pos10);
  feats.incrementCount(featprefix + "-1-" + pos11);
  feats.incrementCount(featprefix + "-2-" + pos12);
  feats.incrementCount(featprefix + "-bi-" + pos11 + "-" + pos10);
  
  return feats;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:25,代码来源:TLinkFeaturizer.java

示例10: getPOSFeatures

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * All features using just the event POS tags.
 */
private Counter<String> getPOSFeatures(TextEvent event1, TextEvent event2, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();
  
  feats.addAll(getSingleEventPOSFeatures("pos1", event1, trees));
  feats.addAll(getSingleEventPOSFeatures("pos2", event2, trees));

  // bigram
  Tree tree1 = trees.get(event1.sid());
  Tree tree2 = trees.get(event2.sid());
  String pos10 = TreeOperator.indexToPOSTag(tree1, event1.index());
  String pos20 = TreeOperator.indexToPOSTag(tree2, event2.index());
  feats.incrementCount("posBi-" + pos10 + "-" + pos20);
  
  return feats;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:19,代码来源:TLinkFeaturizer.java

示例11: getSingleEventFeatures

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * Create features around each event's labeled attributes (tense, aspect, etc)
 */
private Counter<String> getSingleEventFeatures(TextEvent event, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();
  
  if( !_noEventFeats ) {
    feats.incrementCount("ev1Tense-" + event.getTense());
    feats.incrementCount("ev1Aspect-" + event.getAspect());
    if( event.getModality() != null && event.getModality().length() > 0 ) feats.incrementCount("ev1Modality-" + event.getModality());
    feats.incrementCount("ev1Class-" + event.getTheClass());
    if( event.getPolarity() != null && event.getPolarity().length() > 0 ) feats.incrementCount("ev1Polarity-" + event.getPolarity());
  }
  
  // These are from Turker experiments. Will include if the .info file has them!
  if( _doHappened && event.getHappened() != null ) {
    feats.incrementCount("ev1Happened-" + event.getHappened());
  }
  
  return feats;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:22,代码来源:TLinkFeaturizer.java

示例12: getSingleEventNearbyBOWFeatures

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * Get the single tokens around the target event within a window size.
 * @param event The event to link to the document time.
 * @param trees All the parse trees of the entire document.
 */
private Counter<String> getSingleEventNearbyBOWFeatures(TextEvent event, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();
  Tree tree = trees.get(event.sid());
  List<String> tokens = TreeOperator.stringLeavesFromTree(tree);
  
  int window = 2;
  int start = Math.max(0, event.index()-1-window);
  int end = Math.min(tokens.size()-1, event.index()-1+window);

  for( int xx = 0; xx < window; xx++ ) {
    if( start+xx < event.index()-1 ) feats.incrementCount("bow-" + tokens.get(start+xx).toLowerCase());
    if( end-xx > event.index()-1 )   feats.incrementCount("bow-" + tokens.get(end-xx).toLowerCase());
  }
  
  return feats;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:22,代码来源:TLinkFeaturizer.java

示例13: getSingleEventTokenFeatures

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * Create token/lemma/synset features for an event.
 * @param eventIndex Either 1 or 2, the first or second event in your link. This differentiates the feature names.
 */
private Counter<String> getSingleEventTokenFeatures(int eventIndex, TextEvent event1, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();
  
  String token = event1.string();
  String postag = TreeOperator.indexToPOSTag(trees.get(event1.sid()), event1.index());
  String lemma = _wordnet.lemmatizeTaggedWord(token, postag);

  // Token and Lemma
  feats.incrementCount("token" + eventIndex + "-" + token);
  feats.incrementCount("lemma" + eventIndex + "-" + lemma);
  
  // WordNet synset
  Synset[] synsets = null;
  if( postag.startsWith("VB") )
    synsets = _wordnet.synsetsOf(token, POS.VERB);
  else if( postag.startsWith("NN") )
    synsets = _wordnet.synsetsOf(token, POS.NOUN);
  if( synsets != null && synsets.length > 0 )
    feats.incrementCount("synset" + eventIndex + "-" + synsets[0].getOffset());
  
  return feats;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:27,代码来源:TLinkFeaturizer.java

示例14: getTimexFeatures

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
private Counter<String> getTimexFeatures(Timex timex, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();
  List<String> tokens = TreeOperator.stringLeavesFromTree(trees.get(timex.sid()));
  int start = timex.offset()-1;
  int end = start + timex.length()-1; // inclusive
    
  // Leftmost token in the time phrase.
  if( TimebankUtil.isDayOfWeek(tokens.get(end)) )
    feats.incrementCount("timetoken-DAYOFWEEK");
  else
    feats.incrementCount("timetoken-" + tokens.get(end));
  
  // Entire time phrase.
  if( timex.length() > 1 ) {
    String phrase = tokens.get(start);
    for( int xx = 1; xx < timex.length(); xx++ )
      phrase += "_" + tokens.get(start+xx);
    feats.incrementCount("timephrase-" + phrase);
  }
  
  // Is the timex the last phrase in the sentence?
  if( tokens.size()-1 == end )
    feats.incrementCount("timeEOS");
  
  return feats;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:27,代码来源:TLinkFeaturizer.java

示例15: getEventTimeBigram

import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
 * Create one feature string, the bigram of the event word and the rightmost token in the timex phrase.
 * The bigram is ordered by text order.
 */
private Counter<String> getEventTimeBigram(TextEvent event, Timex timex, List<Tree> trees) {
  Counter<String> feats = new ClassicCounter<String>();
  List<String> tokens = TreeOperator.stringLeavesFromTree(trees.get(timex.sid()));
  String timeToken = tokens.get(timex.offset()-1);
  if( TimebankUtil.isDayOfWeek(timeToken) )
    timeToken = "DAYOFWEEK";
  
  if( event.sid() == timex.sid() && event.index() < timex.offset() )
    feats.incrementCount("bi-" + tokens.get(event.index()-1) + "_" + timeToken);
  else if( event.sid() == timex.sid() )
    feats.incrementCount("bi-" + timeToken + "_" + tokens.get(event.index()-1));

  // In different sentences.
  else {
    List<String> eventTokens = TreeOperator.stringLeavesFromTree(trees.get(event.sid()));
    if( event.sid() < timex.sid() )
      feats.incrementCount("bi-" + eventTokens.get(event.index()-1) + "_" + timeToken);
    else
      feats.incrementCount("bi-" + timeToken + "_" + eventTokens.get(event.index()-1));
  }
  
  return feats;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:28,代码来源:TLinkFeaturizer.java


注:本文中的edu.stanford.nlp.stats.ClassicCounter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。