本文整理汇总了Java中edu.stanford.nlp.stats.ClassicCounter类的典型用法代码示例。如果您正苦于以下问题:Java ClassicCounter类的具体用法?Java ClassicCounter怎么用?Java ClassicCounter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ClassicCounter类属于edu.stanford.nlp.stats包,在下文中一共展示了ClassicCounter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getIDFMapForDocument
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* Get an IDF map for the given document string.
*
* @param document
* @return
*/
private static Counter<String> getIDFMapForDocument(String document) {
// Clean up -- remove some Gigaword patterns that slow things down
// / don't help anything
document = headingSeparator.matcher(document).replaceAll("");
DocumentPreprocessor preprocessor = new DocumentPreprocessor(new StringReader(document));
preprocessor.setTokenizerFactory(tokenizerFactory);
Counter<String> idfMap = new ClassicCounter<String>();
for (List<HasWord> sentence : preprocessor) {
if (sentence.size() > MAX_SENTENCE_LENGTH)
continue;
List<TaggedWord> tagged = tagger.tagSentence(sentence);
for (TaggedWord w : tagged) {
if (w.tag().startsWith("n"))
idfMap.incrementCount(w.word());
}
}
return idfMap;
}
示例2: existsTokenMatch
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public boolean existsTokenMatch(List<String> exampleTokens, List<String> exampleLemmas, Set<String> fbDescs) {
// generate stems
List<String> exampleStems = new ArrayList<String>();
for (String token : exampleTokens)
exampleStems.add(stemmer.stem(token));
Counter<String> tokenFeatures = new ClassicCounter<String>();
Counter<String> stemFeatures = new ClassicCounter<String>();
for (String fbDescription : fbDescs) {
List<String> fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription);
List<String> fbDescStems = new ArrayList<>();
for (String fbDescToken : fbDescTokens)
fbDescStems.add(stemmer.stem(fbDescToken));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true));
Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false));
if (tokenFeatures.size() > 0 || stemFeatures.size() > 0)
return true;
}
return false;
}
示例3: extractTokenMatchFeatures
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
private void extractTokenMatchFeatures(List<String> exampleTokens, List<String> exampleLemmas, Set<String> fbDescs, FeatureVector vector) {
if (!FeatureExtractor.containsDomain("tokenMatch")) return;
// generate stems
List<String> exampleStems = new ArrayList<>();
for (String token : exampleTokens)
exampleStems.add(stemmer.stem(token));
Counter<String> tokenFeatures = new ClassicCounter<>();
Counter<String> stemFeatures = new ClassicCounter<>();
for (String fbDescription : fbDescs) {
List<String> fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription);
List<String> fbDescStems = new ArrayList<>();
for (String fbDescToken : fbDescTokens)
fbDescStems.add(stemmer.stem(fbDescToken));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true));
Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true));
Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false));
}
if (opts.verbose >= 3) {
LogInfo.logs("Binary formula desc: %s, token match: %s, stem match: %s", fbDescs, tokenFeatures, stemFeatures);
}
addFeaturesToVector(tokenFeatures, "binary_token", vector);
addFeaturesToVector(stemFeatures, "binary_stem", vector);
}
示例4: features
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public static Counter<String> features(KBPInput input) {
// Ensure RegexNER Tags!
input.sentence.regexner(IntelConfig.Regex_NER_caseless, false);
input.sentence.regexner(IntelConfig.Regex_NER_cased, true);
// Get useful variables
ClassicCounter<String> feats = new ClassicCounter<>();
if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {
return new ClassicCounter<>();
}
// Actually featurize
denseFeatures(input, input.sentence, feats);
surfaceFeatures(input, input.sentence, feats);
dependencyFeatures(input, input.sentence, feats);
relationSpecificFeatures(input, input.sentence, feats);
return feats;
}
示例5: features
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public static Counter<String> features(KBPInput input) {
// Ensure RegexNER Tags!
input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASED, false);
input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASELESS, true);
// Get useful variables
ClassicCounter<String> feats = new ClassicCounter<>();
if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {
return new ClassicCounter<>();
}
// Actually featurize
denseFeatures(input, input.sentence, feats);
surfaceFeatures(input, input.sentence, feats);
dependencyFeatures(input, input.sentence, feats);
relationSpecificFeatures(input, input.sentence, feats);
return feats;
}
示例6: getWordDistributionsPerTopic
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* Analysis and Debugging
* @param wordIndex The index of words to integer IDs.
*/
private List<double[]> getWordDistributionsPerTopic(ClassicCounter<Integer>[] countsBySlot, double smoothing, double smoothingTimesNum, Index<String> wordIndex) {
// System.out.println("Calling getWordDistPerTopic...wordIndex size " + wordIndex.size());
List<double[]> dists = new ArrayList<double[]>(numTopics);
for( int topic = 0; topic < numTopics; topic++ ) {
double[] dist = new double[wordIndex.size()];
dists.add(dist);
for( int ii = 0; ii < wordIndex.size(); ii++ ) {
double probOfWGivenTopic = (countsBySlot[topic].getCount(ii) + smoothing) / (countsBySlot[topic].totalCount() + smoothingTimesNum);
// System.out.println("P(w=" + wordIndex.get(ii) + "|slot=" + topic + ") \t= " + probOfWGivenTopic);
dist[ii] = probOfWGivenTopic;
}
}
return dists;
}
示例7: storeAll
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public void storeAll(
int[][] zs,
double[] topicCounts,
int[][] topicCountsByDoc,
ClassicCounter<Integer>[] wCountsBySlot,
ClassicCounter<Integer>[] verbCountsBySlot,
ClassicCounter<Integer>[] depCountsBySlot,
ClassicCounter<Integer>[] featCountsBySlot) {
this.zs = new int[zs.length][];
for( int xx = 0; xx < zs.length; xx++ )
this.zs[xx] = Arrays.copyOf(zs[xx], zs[xx].length);
this.topicCounts = Arrays.copyOf(topicCounts, topicCounts.length);
this.topicCountsByDoc = new int[topicCountsByDoc.length][];
for( int xx = 0; xx < topicCountsByDoc.length; xx++ )
this.topicCountsByDoc[xx] = Arrays.copyOf(topicCountsByDoc[xx], topicCountsByDoc[xx].length);
this.wCountsBySlot = cloneCounter(wCountsBySlot);
this.verbCountsBySlot = cloneCounter(verbCountsBySlot);
this.depCountsBySlot = cloneCounter(depCountsBySlot);
this.featCountsBySlot = cloneCounter(featCountsBySlot);
}
示例8: createEventDocumentTimeDatum
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
public TLinkDatum createEventDocumentTimeDatum(TextEvent event, Timex time, TLink.TYPE label, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
// Sanity check
if( event == null )
System.out.println("Null event in createEventDocumentTimeDatum(): " + event + " and " + time);
feats.addAll(getSingleEventPOSFeatures("pos1", event, trees));
feats.addAll(getSingleEventFeatures(event, trees)); // tense, modality, etc.
feats.addAll(getSingleEventTokenFeatures(1, event, trees)); // token, lemma, wordnet
feats.addAll(getSingleEventNearbyBOWFeatures(event, trees)); // +.01 accuracy, very minimal.
TLinkDatum datum = new TLinkDatum(label);
datum.addFeatures(feats);
datum.setType(TLinkDatum.TYPE.EDCT);
return datum;
}
示例9: getSingleEventPOSFeatures
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* Event features using just its event POS tags.
*/
private Counter<String> getSingleEventPOSFeatures(String featprefix, TextEvent event1, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
Tree tree1 = trees.get(event1.sid());
String pos10 = TreeOperator.indexToPOSTag(tree1, event1.index());
String pos11 = TreeOperator.indexToPOSTag(tree1, event1.index()-1);
String pos12 = TreeOperator.indexToPOSTag(tree1, event1.index()-2);
if( event1.index() == 2 ) {
pos12 = "<s>";
} else if( event1.index() == 1 ) {
pos11 = "<s>";
pos12 = "<pre-s>";
}
feats.incrementCount(featprefix + "-0-" + pos10);
feats.incrementCount(featprefix + "-1-" + pos11);
feats.incrementCount(featprefix + "-2-" + pos12);
feats.incrementCount(featprefix + "-bi-" + pos11 + "-" + pos10);
return feats;
}
示例10: getPOSFeatures
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* All features using just the event POS tags.
*/
private Counter<String> getPOSFeatures(TextEvent event1, TextEvent event2, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
feats.addAll(getSingleEventPOSFeatures("pos1", event1, trees));
feats.addAll(getSingleEventPOSFeatures("pos2", event2, trees));
// bigram
Tree tree1 = trees.get(event1.sid());
Tree tree2 = trees.get(event2.sid());
String pos10 = TreeOperator.indexToPOSTag(tree1, event1.index());
String pos20 = TreeOperator.indexToPOSTag(tree2, event2.index());
feats.incrementCount("posBi-" + pos10 + "-" + pos20);
return feats;
}
示例11: getSingleEventFeatures
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* Create features around each event's labeled attributes (tense, aspect, etc)
*/
private Counter<String> getSingleEventFeatures(TextEvent event, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
if( !_noEventFeats ) {
feats.incrementCount("ev1Tense-" + event.getTense());
feats.incrementCount("ev1Aspect-" + event.getAspect());
if( event.getModality() != null && event.getModality().length() > 0 ) feats.incrementCount("ev1Modality-" + event.getModality());
feats.incrementCount("ev1Class-" + event.getTheClass());
if( event.getPolarity() != null && event.getPolarity().length() > 0 ) feats.incrementCount("ev1Polarity-" + event.getPolarity());
}
// These are from Turker experiments. Will include if the .info file has them!
if( _doHappened && event.getHappened() != null ) {
feats.incrementCount("ev1Happened-" + event.getHappened());
}
return feats;
}
示例12: getSingleEventNearbyBOWFeatures
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* Get the single tokens around the target event within a window size.
* @param event The event to link to the document time.
* @param trees All the parse trees of the entire document.
*/
private Counter<String> getSingleEventNearbyBOWFeatures(TextEvent event, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
Tree tree = trees.get(event.sid());
List<String> tokens = TreeOperator.stringLeavesFromTree(tree);
int window = 2;
int start = Math.max(0, event.index()-1-window);
int end = Math.min(tokens.size()-1, event.index()-1+window);
for( int xx = 0; xx < window; xx++ ) {
if( start+xx < event.index()-1 ) feats.incrementCount("bow-" + tokens.get(start+xx).toLowerCase());
if( end-xx > event.index()-1 ) feats.incrementCount("bow-" + tokens.get(end-xx).toLowerCase());
}
return feats;
}
示例13: getSingleEventTokenFeatures
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* Create token/lemma/synset features for an event.
* @param eventIndex Either 1 or 2, the first or second event in your link. This differentiates the feature names.
*/
private Counter<String> getSingleEventTokenFeatures(int eventIndex, TextEvent event1, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
String token = event1.string();
String postag = TreeOperator.indexToPOSTag(trees.get(event1.sid()), event1.index());
String lemma = _wordnet.lemmatizeTaggedWord(token, postag);
// Token and Lemma
feats.incrementCount("token" + eventIndex + "-" + token);
feats.incrementCount("lemma" + eventIndex + "-" + lemma);
// WordNet synset
Synset[] synsets = null;
if( postag.startsWith("VB") )
synsets = _wordnet.synsetsOf(token, POS.VERB);
else if( postag.startsWith("NN") )
synsets = _wordnet.synsetsOf(token, POS.NOUN);
if( synsets != null && synsets.length > 0 )
feats.incrementCount("synset" + eventIndex + "-" + synsets[0].getOffset());
return feats;
}
示例14: getTimexFeatures
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
private Counter<String> getTimexFeatures(Timex timex, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
List<String> tokens = TreeOperator.stringLeavesFromTree(trees.get(timex.sid()));
int start = timex.offset()-1;
int end = start + timex.length()-1; // inclusive
// Leftmost token in the time phrase.
if( TimebankUtil.isDayOfWeek(tokens.get(end)) )
feats.incrementCount("timetoken-DAYOFWEEK");
else
feats.incrementCount("timetoken-" + tokens.get(end));
// Entire time phrase.
if( timex.length() > 1 ) {
String phrase = tokens.get(start);
for( int xx = 1; xx < timex.length(); xx++ )
phrase += "_" + tokens.get(start+xx);
feats.incrementCount("timephrase-" + phrase);
}
// Is the timex the last phrase in the sentence?
if( tokens.size()-1 == end )
feats.incrementCount("timeEOS");
return feats;
}
示例15: getEventTimeBigram
import edu.stanford.nlp.stats.ClassicCounter; //导入依赖的package包/类
/**
* Create one feature string, the bigram of the event word and the rightmost token in the timex phrase.
* The bigram is ordered by text order.
*/
private Counter<String> getEventTimeBigram(TextEvent event, Timex timex, List<Tree> trees) {
Counter<String> feats = new ClassicCounter<String>();
List<String> tokens = TreeOperator.stringLeavesFromTree(trees.get(timex.sid()));
String timeToken = tokens.get(timex.offset()-1);
if( TimebankUtil.isDayOfWeek(timeToken) )
timeToken = "DAYOFWEEK";
if( event.sid() == timex.sid() && event.index() < timex.offset() )
feats.incrementCount("bi-" + tokens.get(event.index()-1) + "_" + timeToken);
else if( event.sid() == timex.sid() )
feats.incrementCount("bi-" + timeToken + "_" + tokens.get(event.index()-1));
// In different sentences.
else {
List<String> eventTokens = TreeOperator.stringLeavesFromTree(trees.get(event.sid()));
if( event.sid() < timex.sid() )
feats.incrementCount("bi-" + eventTokens.get(event.index()-1) + "_" + timeToken);
else
feats.incrementCount("bi-" + timeToken + "_" + eventTokens.get(event.index()-1));
}
return feats;
}