當前位置: 首頁>>代碼示例>>Java>>正文


Java CoreLabel.lemma方法代碼示例

本文整理匯總了Java中edu.stanford.nlp.ling.CoreLabel.lemma方法的典型用法代碼示例。如果您正苦於以下問題:Java CoreLabel.lemma方法的具體用法?Java CoreLabel.lemma怎麽用?Java CoreLabel.lemma使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在edu.stanford.nlp.ling.CoreLabel的用法示例。


在下文中一共展示了CoreLabel.lemma方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: tokenize

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
public LinkedList<String> tokenize(String text) {
    LinkedList<String> res = new LinkedList<>();
    if (text != null) {
        Annotation qaTokens = new Annotation(text);
        pipelineTokens.annotate(qaTokens);
        List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
        for (CoreMap sentenceTokens : qssTokens) {
            ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel t : tokens) {
                String lemma = t.lemma();
                String pos = t.tag();
                if (!stopwords.contains(lemma)) {
                    String rep = representativeProvider.getRepresentative(lemma, pos);
                    if (!stopwords.contains(rep)) {
                        res.add(rep);
                    }
                }
            }
        }
    }
    return res;
}
 
開發者ID:UCLA-BD2K,項目名稱:aztec-text-analysis-tools,代碼行數:23,代碼來源:Tokenizer.java

示例2: namedEntityDictionaryMinimization

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
/** Given a phrase, if it contains NERs, make a dictionary minimization around them **/
public void namedEntityDictionaryMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
    // If (.* DT+ [RB|JJ]* NER+ .*) => drop (DT+)
    this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_JJ_NER);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){         
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            
            // Check if the word is DT, drop it
            if ((CoreNLPUtils.isAdj(cl.tag()) || CoreNLPUtils.isAdverb(cl.tag())) 
                    && cl.ner().equals(NE_TYPE.NO_NER)){
                remWords.add(cm);   
            }
        }
        
        // Drop the words not found in dict. 
        this.dropWordsNotFoundInDict(matchWords, remWords);
    }
    
    // Do the safe minimization
    this.namedEntitySafeMinimization(remWords, matchWords);
}
 
開發者ID:gkiril,項目名稱:minie,代碼行數:27,代碼來源:Minimization.java

示例3: addingContentWord

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
@Override public void addingContentWord(CoreLabel token) {
    super.addingContentWord(token);

    token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
    String lemma = token.lemma();
    if (model.getLevel3Lemmas().contains(lemma)) {
        level3WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
    }
    if (model.getLevel2Lemmas().contains(lemma)) {
        level2WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
    }
    if (model.getLevel1Lemmas().contains(lemma)) {
        level1WordSize++;
        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
    }
}
 
開發者ID:dhfbk,項目名稱:tint,代碼行數:19,代碼來源:GalicianReadability.java

示例4: simpleTokenization

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
public LinkedList<String> simpleTokenization(String text) {
    LinkedList<String> res = new LinkedList<>();
    if (text != null) {
        Annotation qaTokens = new Annotation(text);
        pipelineTokens.annotate(qaTokens);
        List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
        for (CoreMap sentenceTokens : qssTokens) {
            ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
            for (CoreLabel t : tokens) {
                String lemma = t.lemma();
                String pos = t.tag();
                if ((pos.startsWith("N") || pos.startsWith("V")) && !stopwords.contains(lemma)) {
                    res.add(lemma);
                }
            }
        }
    }
    return res;
}
 
開發者ID:UCLA-BD2K,項目名稱:aztec-text-analysis-tools,代碼行數:20,代碼來源:Tokenizer.java

示例5: verbPhraseSafeMinimization

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
/** Given a phrase, if it contains a verb phrase, make a verb phrase safe minimization **/
public void verbPhraseSafeMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
    // Flags for checking certain conditions
    boolean isAdverb;
    boolean isNotNER;
    boolean containsNEG;
    
    // If the relation starts with a RB+ VB+, drop RB+
    this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_VB);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){   
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            
            isAdverb = CoreNLPUtils.isAdverb(cl.tag());
            isNotNER = cl.ner().equals(NE_TYPE.NO_NER);
            containsNEG = Polarity.NEG_WORDS.contains(cl.lemma().toLowerCase());
            
            // Check if the word is RB which is not a NER
            if (isAdverb && isNotNER && !containsNEG){
                remWords.add(cm);   
            }
        }
        this.dropWords(remWords, matchWords);
    }
}
 
開發者ID:gkiril,項目名稱:minie,代碼行數:30,代碼來源:Minimization.java

示例6: addingContentWord

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
@Override public void addingContentWord(CoreLabel token) {
        super.addingContentWord(token);

        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
        String lemma = token.lemma();
        if (model.getLevel3Lemmas().contains(lemma)) {
            level3WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
        }
        if (model.getLevel2Lemmas().contains(lemma)) {
            level2WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
        }
        if (model.getLevel1Lemmas().contains(lemma)) {
            level1WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
        }
//        System.out.println("Adding content word (lemma): " + lemma);
//        System.out.println(model.getLevel1Lemmas().contains(lemma));
//        System.out.println(model.getLevel2Lemmas().contains(lemma));
//        System.out.println(model.getLevel3Lemmas().contains(lemma));
//        System.out.println();

//        HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
//        String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
//        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
//
//        if (easyWords.get(1).get(simplePos).contains(lemma)) {
//            level1WordSize++;
//        }
//        if (easyWords.get(2).get(simplePos).contains(lemma)) {
//            level2WordSize++;
//        }
//        if (easyWords.get(3).get(simplePos).contains(lemma)) {
//            level3WordSize++;
//        }
    }
 
開發者ID:dhfbk,項目名稱:tint,代碼行數:38,代碼來源:SpanishReadability.java

示例7: tokenizeBySentence

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
public LinkedList<LinkedList<String>> tokenizeBySentence(String text, LinkedList<int[]> boundaries) {
    LinkedList<LinkedList<String>> res = new LinkedList<>();
    if (text != null) {
        Annotation qaTokens = new Annotation(text);
        pipelineTokens.annotate(qaTokens);
        List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
        for (CoreMap sentenceTokens : qssTokens) {
            ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
            LinkedList<String> sentence=new LinkedList<>();
            boolean first=true;
            int[] bounds=new int[2];
            CoreLabel last=null;
            for (CoreLabel t : tokens) {
                if (first) {
                    bounds[0]=t.beginPosition();
                    first=false;
                }
                last=t;
                String lemma = t.lemma();
                String pos = t.tag();
                if (!stopwords.contains(lemma)) {
                    String rep = representativeProvider.getRepresentative(lemma, pos);
                    if (!stopwords.contains(rep)) {
                        sentence.add(rep);
                    }
                }
            }
            bounds[1]=last.endPosition();
            if (sentence.size()>0) {
                res.add(sentence);
                boundaries.add(bounds);
            }
        }
    }
    return res;
}
 
開發者ID:UCLA-BD2K,項目名稱:aztec-text-analysis-tools,代碼行數:37,代碼來源:Tokenizer.java

示例8: removeVerbsBeforeNouns

import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
/** Given a phrase, if there is (DT* VB+ NN+), remove (DT* VB+) */
public void removeVerbsBeforeNouns(List<CoreMap> remWords, List<CoreMap> matchWords){
    // Flags for checking certain conditions
    boolean isDT;
    boolean isVerb;
    boolean isNotNER;
    boolean containsNEG;
    boolean hasDT = false;
    
    this.tPattern = TokenSequencePattern.compile(REGEX.T_DT_VB_NN_END);
    this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
    while (this.tMatcher.find()){
        matchWords = tMatcher.groupNodes();
        
        for (CoreMap cm: matchWords){
            CoreLabel cl = new CoreLabel(cm);
            if (cl.lemma() == null) cl.setLemma(cl.word());
            // Check if the word is a determiner, no ner and the first word in the phrase
            isDT = cl.tag().equals(POS_TAG.DT);
            isNotNER = cl.ner().equals(NE_TYPE.NO_NER);
            containsNEG = Polarity.NEG_WORDS.contains(cl.lemma().toLowerCase());
            isVerb = CoreNLPUtils.isVerb(cl.tag());
            
            if (isDT && isNotNER && !containsNEG){
                if (cl.index() == this.phrase.getWordCoreLabelList().get(0).index()){
                    remWords.add(cm);
                    hasDT = true;
                } else break;
            }
            // Check if the word is a verb, no ner
            else if (isVerb && isNotNER && !containsNEG){
                // If it's not preceded by DT, check if it's the first word in the phrase
                if (!hasDT) {
                    if (cl.index() == this.phrase.getWordCoreLabelList().get(0).index()){
                        if (!this.sg.hasChildren(new IndexedWord(cl))){
                            remWords.add(cm);
                        }
                    } else break;
                } else {
                    if (!this.sg.hasChildren(new IndexedWord(cl))){
                        remWords.add(cm);
                    }
                }
            }
        }
        
        // If the multi-word expression is found in the dictionary - don't drop it
        if (this.isCoreMapListInDictionary(matchWords)){
            matchWords.clear();
            remWords.clear();
            continue;
        }
        
        this.dropWords(remWords, matchWords);
    }
}
 
開發者ID:gkiril,項目名稱:minie,代碼行數:57,代碼來源:Minimization.java


注:本文中的edu.stanford.nlp.ling.CoreLabel.lemma方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。