本文整理汇总了Java中edu.stanford.nlp.ling.tokensregex.TokenSequencePattern.getMatcher方法的典型用法代码示例。如果您正苦于以下问题:Java TokenSequencePattern.getMatcher方法的具体用法?Java TokenSequencePattern.getMatcher怎么用?Java TokenSequencePattern.getMatcher使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类edu.stanford.nlp.ling.tokensregex.TokenSequencePattern
的用法示例。
在下文中一共展示了TokenSequencePattern.getMatcher方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: pushInfinitiveVerb
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
* Given an object phrase, check if it has infinitive verbs modifying a noun phrase or a named entity.
* If yes, then return "true", else -> "false"
* @param object: the object phrase
* @return
*/
public boolean pushInfinitiveVerb(Phrase object){
TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_TO_VB_NP_NER);
TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
while (tMatcher.find()){
CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
if (firstWordMatch.index() == object.getWordList().get(0).index()){
return true;
}
}
return false;
}
示例2: pushAdverb
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
* Checks if the adverb(s) from the object should be pushed to the relation (if the adverb is followed by preposition
* or 'to).
* @param object: a phrase, the object of the proposition
* @return true, if an adverb is followed by a preposition or "to"
*/
public boolean pushAdverb(Phrase object){
TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_RB_OPT_IN_TO_OPT);
TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
while (tMatcher.find()){
CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
if (firstWordMatch.index() == object.getWordList().get(0).index() &&
object.getWordList().get(0).ner().equals(NE_TYPE.NO_NER)){
return true;
}
}
return false;
}
示例3: generateNounPhraseFreqCandidates
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/** Generate candidates for each noun phrase within the phrase **/
public void generateNounPhraseFreqCandidates(){
SubConstituent sc = new SubConstituent(this.sg);
// Generate candidates for [DT|RB|JJ]+ NN+
TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_DT_RB_JJ_PR_NN);
TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
this.generateCandidatesFromTokenRegexMatch(tMatcher, sc);
}
示例4: detectQuantities
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
* Detect the quantities in a phrase (given the sentence semantic graph).
* @param sentSemGraph: the sentence semantic graph
*/
public void detectQuantities(SemanticGraph sentSemGraph, int i){
// Quantity words and edges
ObjectArrayList<IndexedWord> qWords = new ObjectArrayList<IndexedWord>();
ObjectArrayList<SemanticGraphEdge> qEdges = new ObjectArrayList<SemanticGraphEdge>();
// Tokens regex patterns
String tokenRegexPattern;
if (i == 1)
tokenRegexPattern = REGEX.QUANTITY_SEQUENCE;
else
tokenRegexPattern = REGEX.QUANTITY_SEQUENCE_WITH_NO;
TokenSequencePattern tPattern = TokenSequencePattern.compile(tokenRegexPattern);
TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.getWordCoreLabelList());
// Some reusable variables
List<CoreMap> matchCoreMaps;
ObjectOpenHashSet<IndexedWord> wordsSet = new ObjectOpenHashSet<>();
IndexedWord head;
Set<SemanticGraphEdge> subtreeedges = new HashSet<>();
int matchCounter = -1;
// Annotate the matches and their subtrees
while (tMatcher.find()){
matchCounter++;
matchCoreMaps = tMatcher.groupNodes();
// Get the head word of the phrase and see whether or not to add it to the quantities
head = CoreNLPUtils.getRootFromCoreMapWordList(sentSemGraph, matchCoreMaps);
if (head.ner().equals(NE_TYPE.DATE) || head.ner().equals(NE_TYPE.LOCATION) ||
head.ner().equals(NE_TYPE.MISC) || head.ner().equals(NE_TYPE.ORGANIZATION) ||
head.ner().equals(NE_TYPE.PERSON) || head.ner().equals(NE_TYPE.TIME))
continue;
// Add the sutree elements of the head word if the right relations are in force
for (IndexedWord w: sentSemGraph.getChildren(head)){
if ((sentSemGraph.reln(head, w) == EnglishGrammaticalRelations.QUANTIFIER_MODIFIER) ||
(sentSemGraph.reln(head, w) == EnglishGrammaticalRelations.ADVERBIAL_MODIFIER)){
wordsSet.add(w);
subtreeedges = CoreNLPUtils.getSubTreeEdges(w, sentSemGraph, null);
}
}
// Add the quantity words found and annotate them within the phrase
wordsSet.addAll(CoreNLPUtils.getWordSetFromCoreMapList(matchCoreMaps));
wordsSet.addAll(CoreNLPUtils.getSortedWordsFromListOfEdges(subtreeedges));
wordsSet.retainAll(this.getWordList());
qWords = CoreNLPUtils.getSortedWordsFromSetOfWords(wordsSet);
if (qWords.isEmpty())
continue;
this.setQuantitiesFromWordList(qWords.clone(), qEdges, sentSemGraph, i, matchCounter);
// Reset
qWords.clear();
wordsSet.clear();
}
}
示例5: mergeAdjacentQuantities
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
* When there are already annotated quantities, merge the ones which are right next to each other in a sequence.
*/
public void mergeAdjacentQuantities(){
// Reusable variables
ObjectArrayList<IndexedWord> mergedQuantityWords = new ObjectArrayList<>();
ObjectArrayList<SemanticGraphEdge> mergedEdges = new ObjectArrayList<>();
ObjectArrayList<String> qIds = new ObjectArrayList<>();
ObjectOpenHashSet<IndexedWord> remWords = new ObjectOpenHashSet<>();
ObjectArrayList<IndexedWord> matches = new ObjectArrayList<>();
// Token regex pattern and matcher
TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.ADJACENT_QUANTITIES);
TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.getWordCoreLabelList());
// Merge the quantities when matched
while (tMatcher.find()){
// Get the merged words and edges from the quantities that should be merged.
matches = CoreNLPUtils.getWordListFromCoreMapList(tMatcher.groupNodes());
for (int i = 0; i < matches.size(); i++){
// If it has preposition bridging two quantities, add it to the mergedQuantityWords list
if (matches.get(i).tag().equals(POS_TAG.IN)) {
mergedQuantityWords.add(matches.get(1));
remWords.add(matches.get(1));
}
// Merge the adjacent quantities
for (Quantity q: this.getQuantities()){
if ((Quantity.ST_QUANT + CHARACTER.UNDERSCORE + q.getId()).equals(matches.get(i).word())){
qIds.add(q.getId());
mergedQuantityWords.addAll(q.getQuantityWords());
mergedEdges.addAll(q.getQuantityEdges());
}
}
}
// Add all the words and edges from the merged quantities to the first one and remove the rest
for (int i = 0; i < this.getWordList().size(); i++){
if (this.getWordList().get(i).word().equals(Quantity.ST_QUANT + CHARACTER.UNDERSCORE + qIds.get(0))){
if (this.getQuantityByID(qIds.get(0)) != null){
this.getQuantityByID(qIds.get(0)).setWords(mergedQuantityWords);
this.getQuantityByID(qIds.get(0)).setEdges(mergedEdges);
for (int j = 1; j < qIds.size(); j++){
this.removeQuantityByID(qIds.get(j));
for (int k = i; k < this.getWordList().size(); k++){
if (this.getWordList().get(k).word().equals(Quantity.ST_QUANT + CHARACTER.UNDERSCORE +
qIds.get(j))){
remWords.add(this.getWordList().get(k));
continue;
}
}
}
break;
}
}
}
// Remove and clear
this.removeWordsFromList(remWords);
remWords.clear();
qIds.clear();
}
}
示例6: processPoss
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
* Process possessives in the object.
* If we have ("SUBJ", "REL", "NP_1 POS NP_2"), then: ("SUBJ", "REL + NP_1 + of", "NP_2")
* @param prop: proposition (list of annotated phrases)
*/
public void processPoss(ObjectArrayList<AnnotatedPhrase> prop){
// If there's no object (clause type SV), return
if (prop.size() < 3)
return;
AnnotatedPhrase object = prop.get(2);
AnnotatedPhrase rel = prop.get(1);
TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_NP_POS_NP);
TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
int posIndex = -1;
while (tMatcher.find()){
List<CoreMap> match = tMatcher.groupNodes();
// Check if the first/last word of the match is the first/last word of the object
CoreLabel firstWord = new CoreLabel(match.get(0));
CoreLabel lastWord = new CoreLabel(match.get(match.size() - 1));
boolean check = false;
if (firstWord.index() == object.getWordList().get(0).index()){
if (lastWord.index() == object.getWordList().get(object.getWordList().size() - 1).index()){
check = true;
}
}
if (!check) break;
for (CoreMap cm: match){
CoreLabel cl = new CoreLabel(cm);
if (cl.tag().equals(POS_TAG.POS) && (cl.ner().equals(NE_TYPE.NO_NER))){
posIndex = object.getWordCoreLabelList().indexOf(cl);
break;
}
}
}
if (posIndex > -1){
IndexedWord of = new IndexedWord();
of.setOriginalText("of");
of.setLemma("of");
of.setWord("of");
of.setTag("IN");
of.setNER("O");
of.setIndex(-1);
ObjectArrayList<IndexedWord> pushedWords = new ObjectArrayList<>();
object.removeWordFromList(posIndex);
for (int i = posIndex; i < object.getWordList().size(); i++){
pushedWords.add(object.getWordList().get(i));
}
rel.addWordsToList(pushedWords);
rel.addWordToList(of);
object.removeWordsFromList(pushedWords);
}
}