当前位置: 首页>>代码示例>>Java>>正文


Java TokenSequencePattern类代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.tokensregex.TokenSequencePattern的典型用法代码示例。如果您正苦于以下问题:Java TokenSequencePattern类的具体用法?Java TokenSequencePattern怎么用?Java TokenSequencePattern使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


TokenSequencePattern类属于edu.stanford.nlp.ling.tokensregex包,在下文中一共展示了TokenSequencePattern类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: namedEntityDictionaryMinimization

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/** Given a phrase, if it contains NERs, make a dictionary minimization around them **/
public void namedEntityDictionaryMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
    // If (.* DT+ [RB|JJ]* NER+ .*) => drop (DT+)
    this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_JJ_NER);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){         
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            
            // Check if the word is DT, drop it
            if ((CoreNLPUtils.isAdj(cl.tag()) || CoreNLPUtils.isAdverb(cl.tag())) 
                    && cl.ner().equals(NE_TYPE.NO_NER)){
                remWords.add(cm);   
            }
        }
        
        // Drop the words not found in dict. 
        this.dropWordsNotFoundInDict(matchWords, remWords);
    }
    
    // Do the safe minimization
    this.namedEntitySafeMinimization(remWords, matchWords);
}
 
开发者ID:gkiril,项目名称:minie,代码行数:27,代码来源:Minimization.java

示例2: IntelKBPTokensregexExtractor

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
public IntelKBPTokensregexExtractor(String tokensregexDir) {
    logger.log("Creating TokensRegexExtractor");
    // Create extractors        
    for (RelationType rel : RelationType.values()) {
        if (IntelConfig.bSeprateFormerTitle || rel != RelationType.PER_FORMER_TITLE) {
            String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
            if (IOUtils.existsInClasspathOrFileSystem(path)) {
                List<String> listFiles = new ArrayList<>();
                listFiles.add(tokensregexDir + File.separator + "defs.rules");
                listFiles.add(path);
                logger.log("Rule files for relation " + rel + " is " + path);
                Env env = TokenSequencePattern.getNewEnv();
                env.bind("collapseExtractionRules", true);
                CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
                rules.put(rel, extr);
            }
        }
    }
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:20,代码来源:IntelKBPTokensregexExtractor.java

示例3: KBPTokensregexExtractor

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
public KBPTokensregexExtractor(String tokensregexDir) {
  logger.log("Creating TokensRegexExtractor");
  // Create extractors
  for (RelationType rel : RelationType.values()) {
    String path = tokensregexDir + File.separator + rel.canonicalName.replaceAll("/", "SLASH") + ".rules";
    if (IOUtils.existsInClasspathOrFileSystem(path)) {
      List<String> listFiles = new ArrayList<>();
      listFiles.add(tokensregexDir + File.separator + "defs.rules");
      listFiles.add(path);
      logger.log("Rule files for relation " + rel + " is " + path);
      Env env = TokenSequencePattern.getNewEnv();
      env.bind("collapseExtractionRules", true);
      CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.createExtractorFromFiles(env, listFiles).keepTemporaryTags();
      rules.put(rel, extr);
    }
  }
}
 
开发者ID:intel-analytics,项目名称:InformationExtraction,代码行数:18,代码来源:KBPTokensregexExtractor.java

示例4: TokensRegexAnnotator

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
public TokensRegexAnnotator(String name, Properties props) {
  String prefix = (name == null)? "":name + ".";
  String[] files  = PropertiesUtils.getStringArray(props, prefix + "rules");
  if (files == null || files.length == 0) {
    throw new RuntimeException("No rules specified for TokensRegexAnnotator " + name + ", check " + prefix + "rules property");
  }
  env = TokenSequencePattern.getNewEnv();
  env.bind("options", options);
  extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files);
  verbose = PropertiesUtils.getBool(props, prefix + "verbose", verbose);
  options.setTokenOffsets = PropertiesUtils.getBool(props, prefix + "setTokenOffsets", options.setTokenOffsets);
  options.extractWithTokens = PropertiesUtils.getBool(props, prefix + "extractWithTokens", options.extractWithTokens);
  options.flatten = PropertiesUtils.getBool(props, prefix + "flatten", options.flatten);
  String matchedExpressionsAnnotationKeyName = props.getProperty(prefix + "matchedExpressionsAnnotationKey");
  if (matchedExpressionsAnnotationKeyName != null) {
    options.matchedExpressionsAnnotationKey = EnvLookup.lookupAnnotationKey(env, matchedExpressionsAnnotationKeyName);
    if (options.matchedExpressionsAnnotationKey == null) {
      String propName = prefix + "matchedExpressionsAnnotationKey";
      throw new RuntimeException("Cannot determine annotation key for " + propName + "=" + matchedExpressionsAnnotationKeyName);
    }
  }
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:23,代码来源:TokensRegexAnnotator.java

示例5: pushInfinitiveVerb

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/**
 * Given an object phrase, check if it has infinitive verbs modifying a noun phrase or a named entity. 
 * If yes, then return "true", else -> "false"
 * @param object: the object phrase
 * @return
 */
public boolean pushInfinitiveVerb(Phrase object){
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_TO_VB_NP_NER);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
    
    while (tMatcher.find()){         
        CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
        if (firstWordMatch.index() == object.getWordList().get(0).index()){
            return true;
        }
    }
    
    return false;
}
 
开发者ID:gkiril,项目名称:minie,代码行数:20,代码来源:MinIE.java

示例6: pushAdverb

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/**
 * Checks if the adverb(s) from the object should be pushed to the relation (if the adverb is followed by preposition 
 * or 'to).
 * @param object: a phrase, the object of the proposition
 * @return true, if an adverb is followed by a preposition or "to"
 */
public boolean pushAdverb(Phrase object){        
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_RB_OPT_IN_TO_OPT);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
    while (tMatcher.find()){         
        CoreLabel firstWordMatch = new CoreLabel(tMatcher.groupNodes().get(0));
        if (firstWordMatch.index() == object.getWordList().get(0).index() && 
                object.getWordList().get(0).ner().equals(NE_TYPE.NO_NER)){
            return true;
        }
    }
    return false;
}
 
开发者ID:gkiril,项目名称:minie,代码行数:19,代码来源:MinIE.java

示例7: generateSequentialPatternExtractions

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/** Generate some extractions from TokenRegex patterns **/
public void generateSequentialPatternExtractions() {
    // Reusable variables
    ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>();
    IndexedWord subjRoot;
    IndexedWord objRoot;
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_ORG_IN_LOC);
    this.tMatcher = this.tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence));
    while (this.tMatcher.find()){
        this.setIsARelation();
        for (IndexedWord w: CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes())) {
            if (w.ner().equals(NE_TYPE.ORGANIZATION)) {
                this.subj.addWordToList(w);
            }
            else if (w.ner().equals(NE_TYPE.LOCATION)) {
                this.obj.addWordToList(w);
            }
            else if (w.ner().equals(NE_TYPE.NO_NER) && w.tag().equals(POS_TAG.IN)) {
                this.rel.addWordToList(w);
            }
        }
        subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, subj.getWordList());
        objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, obj.getWordList());
        tempProp.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
        tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot()));
        tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot));
        this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution()));
        
        // Clean the variables
        tempProp.clear();
        this.subj.clear();
        this.rel.clear();
        this.obj.clear();
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:37,代码来源:ImplicitExtractions.java

示例8: extractCityOfLocation

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/** If "city|town of LOCATION" => "LOCATION" "is" "city|town" **/
public void extractCityOfLocation() {
    // Reusable variable
    ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>();
    IndexedWord subjRoot;
    IndexedWord objRoot;
    
    // Set the relation to be "is-a" relation
    this.setIsARelation();
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_CITY_OF_LOC);
    this.tMatcher = tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence));
    while (this.tMatcher.find()){    
        ObjectArrayList<IndexedWord> mWords = CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes());
        for (IndexedWord w: mWords) {
            if (!w.ner().equals(NE_TYPE.LOCATION) && !w.tag().equals(POS_TAG.IN))
                this.obj.addWordToList(w);
            else{ 
                if (!w.tag().equals(POS_TAG.IN))
                    this.subj.addWordToList(w);
            }
        }
        
        // Add the subj/rel/obj to the temporary proposition and then to the real propositions
        subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList());
        objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.obj.getWordList());
        tempProp.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
        tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot()));
        tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot));
        this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution()));
                
        // Clean the variables
        tempProp.clear();
        this.subj.clear();
        this.obj.clear();
    }
    
    // Clear the relation
    this.rel.clear();
}
 
开发者ID:gkiril,项目名称:minie,代码行数:41,代码来源:ImplicitExtractions.java

示例9: verbPhraseSafeMinimization

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/** Given a phrase, if it contains a verb phrase, make a verb phrase safe minimization **/
public void verbPhraseSafeMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
    // Flags for checking certain conditions
    boolean isAdverb;
    boolean isNotNER;
    boolean containsNEG;
    
    // If the relation starts with a RB+ VB+, drop RB+
    this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_VB);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){   
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            
            isAdverb = CoreNLPUtils.isAdverb(cl.tag());
            isNotNER = cl.ner().equals(NE_TYPE.NO_NER);
            containsNEG = Polarity.NEG_WORDS.contains(cl.lemma().toLowerCase());
            
            // Check if the word is RB which is not a NER
            if (isAdverb && isNotNER && !containsNEG){
                remWords.add(cm);   
            }
        }
        this.dropWords(remWords, matchWords);
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:30,代码来源:Minimization.java

示例10: generateNounPhraseFreqCandidates

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/** Generate candidates for each noun phrase within the phrase **/
public void generateNounPhraseFreqCandidates(){
    SubConstituent sc = new SubConstituent(this.sg);
    
    // Generate candidates for [DT|RB|JJ]+ NN+
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_DT_RB_JJ_PR_NN);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    this.generateCandidatesFromTokenRegexMatch(tMatcher, sc);
}
 
开发者ID:gkiril,项目名称:minie,代码行数:10,代码来源:FrequencyCandidates.java

示例11: getNewEnv

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
public static Env getNewEnv()
{
  Env env = TokenSequencePattern.getNewEnv();

  // Do case insensitive matching
  env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE);

  initEnv(env);
  return env;
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:11,代码来源:NumberNormalizer.java

示例12: detectQuantities

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/**
 * Detect the quantities in a phrase (given the sentence semantic graph).
 * @param sentSemGraph: the sentence semantic graph
 */
public void detectQuantities(SemanticGraph sentSemGraph, int i){
    // Quantity words and edges
    ObjectArrayList<IndexedWord> qWords = new ObjectArrayList<IndexedWord>();
    ObjectArrayList<SemanticGraphEdge> qEdges = new ObjectArrayList<SemanticGraphEdge>();
    
    // Tokens regex patterns
    String tokenRegexPattern;
    if (i == 1)
        tokenRegexPattern = REGEX.QUANTITY_SEQUENCE;
    else
        tokenRegexPattern = REGEX.QUANTITY_SEQUENCE_WITH_NO;
    
    TokenSequencePattern tPattern = TokenSequencePattern.compile(tokenRegexPattern);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.getWordCoreLabelList());
    
    // Some reusable variables
    List<CoreMap> matchCoreMaps;
    ObjectOpenHashSet<IndexedWord> wordsSet = new ObjectOpenHashSet<>();
    IndexedWord head;
    Set<SemanticGraphEdge> subtreeedges = new HashSet<>();
    int matchCounter = -1;
    
    // Annotate the matches and their subtrees
    while (tMatcher.find()){      
        matchCounter++;
        matchCoreMaps = tMatcher.groupNodes();
        
        // Get the head word of the phrase and see whether or not to add it to the quantities
        head = CoreNLPUtils.getRootFromCoreMapWordList(sentSemGraph, matchCoreMaps);
        if (head.ner().equals(NE_TYPE.DATE) || head.ner().equals(NE_TYPE.LOCATION) ||
                head.ner().equals(NE_TYPE.MISC) || head.ner().equals(NE_TYPE.ORGANIZATION) || 
                head.ner().equals(NE_TYPE.PERSON) || head.ner().equals(NE_TYPE.TIME))
            continue;
        
        // Add the sutree elements of the head word if the right relations are in force
        for (IndexedWord w: sentSemGraph.getChildren(head)){
            if ((sentSemGraph.reln(head, w) == EnglishGrammaticalRelations.QUANTIFIER_MODIFIER) ||
                (sentSemGraph.reln(head, w) == EnglishGrammaticalRelations.ADVERBIAL_MODIFIER)){
                wordsSet.add(w);
                subtreeedges = CoreNLPUtils.getSubTreeEdges(w, sentSemGraph, null);
            }
        }
        
        // Add the quantity words found and annotate them within the phrase
        wordsSet.addAll(CoreNLPUtils.getWordSetFromCoreMapList(matchCoreMaps));
        wordsSet.addAll(CoreNLPUtils.getSortedWordsFromListOfEdges(subtreeedges));
        wordsSet.retainAll(this.getWordList());
        qWords = CoreNLPUtils.getSortedWordsFromSetOfWords(wordsSet);
        if (qWords.isEmpty())
            continue;
        this.setQuantitiesFromWordList(qWords.clone(), qEdges, sentSemGraph, i, matchCounter);
        
        // Reset
        qWords.clear();
        wordsSet.clear();
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:62,代码来源:AnnotatedPhrase.java

示例13: mergeAdjacentQuantities

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/**
 * When there are already annotated quantities, merge the ones which are right next to each other in a sequence.
 */
public void mergeAdjacentQuantities(){
    // Reusable variables
    ObjectArrayList<IndexedWord> mergedQuantityWords = new ObjectArrayList<>();
    ObjectArrayList<SemanticGraphEdge> mergedEdges = new ObjectArrayList<>();
    ObjectArrayList<String> qIds = new ObjectArrayList<>();
    ObjectOpenHashSet<IndexedWord> remWords = new ObjectOpenHashSet<>();
    ObjectArrayList<IndexedWord> matches = new ObjectArrayList<>();
    
    // Token regex pattern and matcher
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.ADJACENT_QUANTITIES);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(this.getWordCoreLabelList());
    
    // Merge the quantities when matched
    while (tMatcher.find()){
        // Get the merged words and edges from the quantities that should be merged.
        matches = CoreNLPUtils.getWordListFromCoreMapList(tMatcher.groupNodes());
        
        for (int i = 0; i < matches.size(); i++){
            // If it has preposition bridging two quantities, add it to the mergedQuantityWords list
            if (matches.get(i).tag().equals(POS_TAG.IN)) {
                mergedQuantityWords.add(matches.get(1));
                remWords.add(matches.get(1));
            }
            
            // Merge the adjacent quantities
            for (Quantity q: this.getQuantities()){
                if ((Quantity.ST_QUANT + CHARACTER.UNDERSCORE + q.getId()).equals(matches.get(i).word())){
                    qIds.add(q.getId());
                    mergedQuantityWords.addAll(q.getQuantityWords());
                    mergedEdges.addAll(q.getQuantityEdges());
                }
            }
        }
        
        // Add all the words and edges from the merged quantities to the first one and remove the rest
        for (int i = 0; i < this.getWordList().size(); i++){
            if (this.getWordList().get(i).word().equals(Quantity.ST_QUANT + CHARACTER.UNDERSCORE + qIds.get(0))){
                if (this.getQuantityByID(qIds.get(0)) != null){
                    this.getQuantityByID(qIds.get(0)).setWords(mergedQuantityWords);
                    this.getQuantityByID(qIds.get(0)).setEdges(mergedEdges);
                    for (int j = 1; j < qIds.size(); j++){
                        this.removeQuantityByID(qIds.get(j));
                        for (int k = i; k < this.getWordList().size(); k++){
                            if (this.getWordList().get(k).word().equals(Quantity.ST_QUANT + CHARACTER.UNDERSCORE + 
                                                                        qIds.get(j))){
                                remWords.add(this.getWordList().get(k));
                                continue;
                            }
                        }
                    }
                    break;
                }
            }
        }
        
        // Remove and clear 
        this.removeWordsFromList(remWords);
        remWords.clear();
        qIds.clear();
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:65,代码来源:AnnotatedPhrase.java

示例14: processPoss

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/**
 * Process possessives in the object.
 * If we have ("SUBJ", "REL", "NP_1 POS NP_2"), then: ("SUBJ", "REL + NP_1 + of", "NP_2")
 * @param prop: proposition (list of annotated phrases)
 */
public void processPoss(ObjectArrayList<AnnotatedPhrase> prop){
    // If there's no object (clause type SV), return
    if (prop.size() < 3)
        return;
    
    AnnotatedPhrase object = prop.get(2);
    AnnotatedPhrase rel = prop.get(1);
    TokenSequencePattern tPattern = TokenSequencePattern.compile(REGEX.T_NP_POS_NP);
    TokenSequenceMatcher tMatcher = tPattern.getMatcher(object.getWordCoreLabelList());
    
    int posIndex = -1;
    
    while (tMatcher.find()){         
        List<CoreMap> match = tMatcher.groupNodes();
        
        // Check if the first/last word of the match is the first/last word of the object
        CoreLabel firstWord = new CoreLabel(match.get(0));
        CoreLabel lastWord = new CoreLabel(match.get(match.size() - 1));
        boolean check = false;
        if (firstWord.index() == object.getWordList().get(0).index()){
            if (lastWord.index() == object.getWordList().get(object.getWordList().size() - 1).index()){
                check = true;
            }
        }
        if (!check) break;
        
        for (CoreMap cm: match){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.tag().equals(POS_TAG.POS) && (cl.ner().equals(NE_TYPE.NO_NER))){
                posIndex = object.getWordCoreLabelList().indexOf(cl);
                break;
            }
        }
    }
    
    if (posIndex > -1){
        IndexedWord of = new IndexedWord();
        of.setOriginalText("of");
        of.setLemma("of");
        of.setWord("of");
        of.setTag("IN");
        of.setNER("O");
        of.setIndex(-1);
        
        ObjectArrayList<IndexedWord> pushedWords = new ObjectArrayList<>();
        object.removeWordFromList(posIndex);
        for (int i = posIndex; i < object.getWordList().size(); i++){
            pushedWords.add(object.getWordList().get(i));
        }
        rel.addWordsToList(pushedWords);
        rel.addWordToList(of);
        object.removeWordsFromList(pushedWords);
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:60,代码来源:MinIE.java

示例15: extractPersonIsNPOfOrg

import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern; //导入依赖的package包/类
/** If   ORG+ POS? NP PERSON+ => "PERSON" "is NP of" "ORG" (if there are , and or -> make multiple extractions) **/
public void extractPersonIsNPOfOrg() {
    // Reusable variables
    ObjectArrayList<AnnotatedPhrase> tempProp = new ObjectArrayList<>();
    ObjectArrayList<AnnotatedPhrase> subjects = new ObjectArrayList<>();
    IndexedWord subjRoot;
    IndexedWord objRoot;
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_ORG_NP_PERSON);
    this.tMatcher = this.tPattern.getMatcher(CoreNLPUtils.getCoreLabelListFromIndexedWordList(this.sentence));
    while (this.tMatcher.find()){    
        // Set the relation to be "is-a" relation
        this.setIsARelation();
        
        for (IndexedWord w: CoreNLPUtils.listOfCoreMapWordsToIndexedWordList(this.tMatcher.groupNodes())) {
            if (w.ner().equals(NE_TYPE.PERSON))
                this.subj.addWordToList(w);
            else if (w.ner().equals(NE_TYPE.ORGANIZATION))
                this.obj.addWordToList(w);
            else if (w.tag().equals(POS_TAG.POS))
                continue;
            else if (w.lemma().equals(CHARACTER.COMMA) || w.lemma().equals("and") || w.lemma().equals("or")) {
                subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList());
                subjects.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
                this.subj.clear();
            }
            else this.rel.addWordToList(w);
        }
        subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.subj.getWordList());
        subjects.add(new AnnotatedPhrase(this.subj.getWordList().clone(), subjRoot));
        objRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, this.obj.getWordList());
        
        IndexedWord ofWord = new IndexedWord();
        ofWord.setWord("of");
        ofWord.setOriginalText("of");
        ofWord.setTag(POS_TAG.IN);
        ofWord.setNER(NE_TYPE.NO_NER);
        ofWord.setLemma("of");
        ofWord.setValue("of");
        ofWord.setIndex(-2);
        this.rel.addWordToList(ofWord);
        
        for (AnnotatedPhrase subject: subjects) {
            // Add the subj/rel/obj to the temporary proposition and then to the real propositions
            subjRoot = CoreNLPUtils.getRootFromWordList(this.sentenceSemGraph, subject.getWordList());
            tempProp.add(new AnnotatedPhrase(subject.getWordList(), subjRoot));
            tempProp.add(new AnnotatedPhrase(this.rel.getWordList().clone(), this.rel.getRoot()));
            tempProp.add(new AnnotatedPhrase(this.obj.getWordList().clone(), objRoot));
            this.propositions.add(new AnnotatedProposition(tempProp.clone(), new Attribution()));
            tempProp.clear();
        }
        
        // Clean the variables
        this.subj.clear();
        this.obj.clear();
        this.rel.clear();
    }
}
 
开发者ID:gkiril,项目名称:minie,代码行数:59,代码来源:ImplicitExtractions.java


注:本文中的edu.stanford.nlp.ling.tokensregex.TokenSequencePattern类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。