当前位置: 首页>>代码示例>>Java>>正文


Java TokenSequencePattern.getMatcher方法代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.tokensregex.TokenSequencePattern.getMatcher方法的典型用法代码示例。如果您正苦于以下问题:Java TokenSequencePattern.getMatcher方法的具体用法?Java TokenSequencePattern.getMatcher怎么用?Java TokenSequencePattern.getMatcher使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在edu.stanford.nlp.ling.tokensregex.TokenSequencePattern的用法示例。


在下文中一共展示了TokenSequencePattern.getMatcher方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: pushInfinitiveVerb

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
 * Given an object phrase, check if it has infinitive verbs modifying a noun phrase or a named entity. 
 * If yes, then return "true", else -> "false"
 * @param object: the object phrase
 * @return
 */
public boolean pushInfinitiveVerb(Phrase object){
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_TO_VB_NP_NER);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
    
    while (tMatcher.find()){         
        CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
        if (firstWordMatch.index() == object.getWordList().get(0).index()){
            return true;
        }
    }
    
    return false;
}
 
开发者ID:gkiril,项目名称:minie,代码行数:20,代码来源:MinIE.java

示例2: pushAdverb

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
 * Checks if the adverb(s) from the object should be pushed to the relation (if the adverb is followed by preposition 
 * or 'to).
 * @param object: a phrase, the object of the proposition
 * @return true, if an adverb is followed by a preposition or "to"
 */
public boolean pushAdverb(Phrase object){        
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_RB_OPT_IN_TO_OPT);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
    while (tMatcher.find()){         
        CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
        if (firstWordMatch.index() == object.getWordList().get(0).index() && 
                object.getWordList().get(0).ner().equals(NE_TYPE.NO_NER)){
            return true;
        }
    }
    return false;
}
 
开发者ID:gkiril,项目名称:minie,代码行数:19,代码来源:MinIE.java

示例3: generateNounPhraseFreqCandidates

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/** Generate candidates for each noun phrase within the phrase **/
public void generateNounPhraseFreqCandidates(){
    SubConstituent sc = new SubConstituent(this.sg);
    
    // Generate candidates for [DT|RB|JJ]+ NN+
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_DT_RB_JJ_PR_NN);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    this.generateCandidatesFromTokenRegexMatch(tMatcher, sc);
}
 
开发者ID:gkiril,项目名称:minie,代码行数:10,代码来源:FrequencyCandidates.java

示例4: detectQuantities

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
 * Detect the quantities in a phrase (given the sentence semantic graph).
 * @param sentSemGraph: the sentence semantic graph
 */
public void detectQuantities(SemanticGraph sentSemGraph, int i){
    // Quantity words and edges
    ObjectArrayList<IndexedWord> qWords = new ObjectArrayList<IndexedWord>();
    ObjectArrayList<SemanticGraphEdge> qEdges = new ObjectArrayList<SemanticGraphEdge>();
    
    // Tokens regex patterns
    String tokenRegexPattern;
    if (i == 1)
        tokenRegexPattern = REGEX.QUANTITY_SEQUENCE;
    else
        tokenRegexPattern = REGEX.QUANTITY_SEQUENCE_WITH_NO;
    
    TokenSequencePattern tPattern = TokenSequencePattern.compile(tokenRegexPattern);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.getWordCoreLabelList());
    
    // Some reusable variables
    List<CoreMap> matchCoreMaps;
    ObjectOpenHashSet<IndexedWord> wordsSet = new ObjectOpenHashSet<>();
    IndexedWord head;
    Set<SemanticGraphEdge> subtreeedges = new HashSet<>();
    int matchCounter = -1;
    
    // Annotate the matches and their subtrees
    while (tMatcher.find()){      
        matchCounter++;
        matchCoreMaps = tMatcher.groupNodes();
        
        // Get the head word of the phrase and see whether or not to add it to the quantities
        head = CoreNLPUtils.getRootFromCoreMapWordList(sentSemGraph, matchCoreMaps);
        if (head.ner().equals(NE_TYPE.DATE) || head.ner().equals(NE_TYPE.LOCATION) ||
                head.ner().equals(NE_TYPE.MISC) || head.ner().equals(NE_TYPE.ORGANIZATION) || 
                head.ner().equals(NE_TYPE.PERSON) || head.ner().equals(NE_TYPE.TIME))
            continue;
        
        // Add the sutree elements of the head word if the right relations are in force
        for (IndexedWord w: sentSemGraph.getChildren(head)){
            if ((sentSemGraph.reln(head, w) == EnglishGrammaticalRelations.QUANTIFIER_MODIFIER) ||
                (sentSemGraph.reln(head, w) == EnglishGrammaticalRelations.ADVERBIAL_MODIFIER)){
                wordsSet.add(w);
                subtreeedges = CoreNLPUtils.getSubTreeEdges(w, sentSemGraph, null);
            }
        }
        
        // Add the quantity words found and annotate them within the phrase
        wordsSet.addAll(CoreNLPUtils.getWordSetFromCoreMapList(matchCoreMaps));
        wordsSet.addAll(CoreNLPUtils.getSortedWordsFromListOfEdges(subtreeedges));
        wordsSet.retainAll(this.getWordList());
        qWords = CoreNLPUtils.getSortedWordsFromSetOfWords(wordsSet);
        if (qWords.isEmpty())
            continue;
        this.setQuantitiesFromWordList(qWords.clone(), qEdges, sentSemGraph, i, matchCounter);
        
        // Reset
        qWords.clear();
        wordsSet.clear();
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:62,代码来源:AnnotatedPhrase.java

示例5: mergeAdjacentQuantities

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
 * When there are already annotated quantities, merge the ones which are right next to each other in a sequence.
 */
public void mergeAdjacentQuantities(){
    // Reusable variables
    ObjectArrayList<IndexedWord> mergedQuantityWords = new ObjectArrayList<>();
    ObjectArrayList<SemanticGraphEdge> mergedEdges = new ObjectArrayList<>();
    ObjectArrayList<String> qIds = new ObjectArrayList<>();
    ObjectOpenHashSet<IndexedWord> remWords = new ObjectOpenHashSet<>();
    ObjectArrayList<IndexedWord> matches = new ObjectArrayList<>();
    
    // Token regex pattern and matcher
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.ADJACENT_QUANTITIES);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.getWordCoreLabelList());
    
    // Merge the quantities when matched
    while (tMatcher.find()){
        // Get the merged words and edges from the quantities that should be merged.
        matches = CoreNLPUtils.getWordListFromCoreMapList(tMatcher.groupNodes());
        
        for (int i = 0; i < matches.size(); i++){
            // If it has preposition bridging two quantities, add it to the mergedQuantityWords list
            if (matches.get(i).tag().equals(POS_TAG.IN)) {
                mergedQuantityWords.add(matches.get(1));
                remWords.add(matches.get(1));
            }
            
            // Merge the adjacent quantities
            for (Quantity q: this.getQuantities()){
                if ((Quantity.ST_QUANT + CHARACTER.UNDERSCORE + q.getId()).equals(matches.get(i).word())){
                    qIds.add(q.getId());
                    mergedQuantityWords.addAll(q.getQuantityWords());
                    mergedEdges.addAll(q.getQuantityEdges());
                }
            }
        }
        
        // Add all the words and edges from the merged quantities to the first one and remove the rest
        for (int i = 0; i < this.getWordList().size(); i++){
            if (this.getWordList().get(i).word().equals(Quantity.ST_QUANT + CHARACTER.UNDERSCORE + qIds.get(0))){
                if (this.getQuantityByID(qIds.get(0)) != null){
                    this.getQuantityByID(qIds.get(0)).setWords(mergedQuantityWords);
                    this.getQuantityByID(qIds.get(0)).setEdges(mergedEdges);
                    for (int j = 1; j < qIds.size(); j++){
                        this.removeQuantityByID(qIds.get(j));
                        for (int k = i; k < this.getWordList().size(); k++){
                            if (this.getWordList().get(k).word().equals(Quantity.ST_QUANT + CHARACTER.UNDERSCORE + 
                                                                        qIds.get(j))){
                                remWords.add(this.getWordList().get(k));
                                continue;
                            }
                        }
                    }
                    break;
                }
            }
        }
        
        // Remove and clear 
        this.removeWordsFromList(remWords);
        remWords.clear();
        qIds.clear();
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:65,代码来源:AnnotatedPhrase.java

示例6: processPoss

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入方法依赖的package包/类
/**
 * Process possessives in the object.
 * If we have ("SUBJ", "REL", "NP_1 POS NP_2"), then: ("SUBJ", "REL + NP_1 + of", "NP_2")
 * @param prop: proposition (list of annotated phrases)
 */
public void processPoss(ObjectArrayList<AnnotatedPhrase> prop){
    // If there's no object (clause type SV), return
    if (prop.size() < 3)
        return;
    
    AnnotatedPhrase object = prop.get(2);
    AnnotatedPhrase rel = prop.get(1);
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_NP_POS_NP);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
    
    int posIndex = -1;
    
    while (tMatcher.find()){         
        List<CoreMap> match = tMatcher.groupNodes();
        
        // Check if the first/last word of the match is the first/last word of the object
        CoreLabel firstWord = new CoreLabel(match.get(0));
        CoreLabel lastWord = new CoreLabel(match.get(match.size() - 1));
        boolean check = false;
        if (firstWord.index() == object.getWordList().get(0).index()){
            if (lastWord.index() == object.getWordList().get(object.getWordList().size() - 1).index()){
                check = true;
            }
        }
        if (!check) break;
        
        for (CoreMap cm: match){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.tag().equals(POS_TAG.POS) && (cl.ner().equals(NE_TYPE.NO_NER))){
                posIndex = object.getWordCoreLabelList().indexOf(cl);
                break;
            }
        }
    }
    
    if (posIndex > -1){
        IndexedWord of = new IndexedWord();
        of.setOriginalText("of");
        of.setLemma("of");
        of.setWord("of");
        of.setTag("IN");
        of.setNER("O");
        of.setIndex(-1);
        
        ObjectArrayList<IndexedWord> pushedWords = new ObjectArrayList<>();
        object.removeWordFromList(posIndex);
        for (int i = posIndex; i < object.getWordList().size(); i++){
            pushedWords.add(object.getWordList().get(i));
        }
        rel.addWordsToList(pushedWords);
        rel.addWordToList(of);
        object.removeWordsFromList(pushedWords);
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:60,代码来源:MinIE.java


注:本文中的edu.stanford.nlp.ling.tokensregex.TokenSequencePattern.getMatcher方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。