本文整理匯總了Java中edu.stanford.nlp.ling.CoreLabel.lemma方法的典型用法代碼示例。如果您正苦於以下問題:Java CoreLabel.lemma方法的具體用法?Java CoreLabel.lemma怎麽用?Java CoreLabel.lemma使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類edu.stanford.nlp.ling.CoreLabel
的用法示例。
在下文中一共展示了CoreLabel.lemma方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: tokenize
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
public LinkedList<String> tokenize(String text) {
LinkedList<String> res = new LinkedList<>();
if (text != null) {
Annotation qaTokens = new Annotation(text);
pipelineTokens.annotate(qaTokens);
List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentenceTokens : qssTokens) {
ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
for (CoreLabel t : tokens) {
String lemma = t.lemma();
String pos = t.tag();
if (!stopwords.contains(lemma)) {
String rep = representativeProvider.getRepresentative(lemma, pos);
if (!stopwords.contains(rep)) {
res.add(rep);
}
}
}
}
}
return res;
}
示例2: namedEntityDictionaryMinimization
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
/** Given a phrase, if it contains NERs, make a dictionary minimization around them **/
public void namedEntityDictionaryMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
// If (.* DT+ [RB|JJ]* NER+ .*) => drop (DT+)
this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_JJ_NER);
this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
while (this.tMatcher.find()){
matchWords = tMatcher.groupNodes();
for (CoreMap cm: matchWords){
CoreLabel cl = new CoreLabel(cm);
if (cl.lemma() == null) cl.setLemma(cl.word());
// Check if the word is DT, drop it
if ((CoreNLPUtils.isAdj(cl.tag()) || CoreNLPUtils.isAdverb(cl.tag()))
&& cl.ner().equals(NE_TYPE.NO_NER)){
remWords.add(cm);
}
}
// Drop the words not found in dict.
this.dropWordsNotFoundInDict(matchWords, remWords);
}
// Do the safe minimization
this.namedEntitySafeMinimization(remWords, matchWords);
}
示例3: addingContentWord
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
@Override public void addingContentWord(CoreLabel token) {
super.addingContentWord(token);
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
String lemma = token.lemma();
if (model.getLevel3Lemmas().contains(lemma)) {
level3WordSize++;
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
}
if (model.getLevel2Lemmas().contains(lemma)) {
level2WordSize++;
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
}
if (model.getLevel1Lemmas().contains(lemma)) {
level1WordSize++;
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
}
}
示例4: simpleTokenization
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
public LinkedList<String> simpleTokenization(String text) {
LinkedList<String> res = new LinkedList<>();
if (text != null) {
Annotation qaTokens = new Annotation(text);
pipelineTokens.annotate(qaTokens);
List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentenceTokens : qssTokens) {
ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
for (CoreLabel t : tokens) {
String lemma = t.lemma();
String pos = t.tag();
if ((pos.startsWith("N") || pos.startsWith("V")) && !stopwords.contains(lemma)) {
res.add(lemma);
}
}
}
}
return res;
}
示例5: verbPhraseSafeMinimization
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
/** Given a phrase, if it contains a verb phrase, make a verb phrase safe minimization **/
public void verbPhraseSafeMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
// Flags for checking certain conditions
boolean isAdverb;
boolean isNotNER;
boolean containsNEG;
// If the relation starts with a RB+ VB+, drop RB+
this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_VB);
this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
while (this.tMatcher.find()){
matchWords = tMatcher.groupNodes();
for (CoreMap cm: matchWords){
CoreLabel cl = new CoreLabel(cm);
if (cl.lemma() == null) cl.setLemma(cl.word());
isAdverb = CoreNLPUtils.isAdverb(cl.tag());
isNotNER = cl.ner().equals(NE_TYPE.NO_NER);
containsNEG = Polarity.NEG_WORDS.contains(cl.lemma().toLowerCase());
// Check if the word is RB which is not a NER
if (isAdverb && isNotNER && !containsNEG){
remWords.add(cm);
}
}
this.dropWords(remWords, matchWords);
}
}
示例6: addingContentWord
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
@Override public void addingContentWord(CoreLabel token) {
super.addingContentWord(token);
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
String lemma = token.lemma();
if (model.getLevel3Lemmas().contains(lemma)) {
level3WordSize++;
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
}
if (model.getLevel2Lemmas().contains(lemma)) {
level2WordSize++;
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
}
if (model.getLevel1Lemmas().contains(lemma)) {
level1WordSize++;
token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
}
// System.out.println("Adding content word (lemma): " + lemma);
// System.out.println(model.getLevel1Lemmas().contains(lemma));
// System.out.println(model.getLevel2Lemmas().contains(lemma));
// System.out.println(model.getLevel3Lemmas().contains(lemma));
// System.out.println();
// HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
// String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
// String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
//
// if (easyWords.get(1).get(simplePos).contains(lemma)) {
// level1WordSize++;
// }
// if (easyWords.get(2).get(simplePos).contains(lemma)) {
// level2WordSize++;
// }
// if (easyWords.get(3).get(simplePos).contains(lemma)) {
// level3WordSize++;
// }
}
示例7: tokenizeBySentence
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
public LinkedList<LinkedList<String>> tokenizeBySentence(String text, LinkedList<int[]> boundaries) {
LinkedList<LinkedList<String>> res = new LinkedList<>();
if (text != null) {
Annotation qaTokens = new Annotation(text);
pipelineTokens.annotate(qaTokens);
List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentenceTokens : qssTokens) {
ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
LinkedList<String> sentence=new LinkedList<>();
boolean first=true;
int[] bounds=new int[2];
CoreLabel last=null;
for (CoreLabel t : tokens) {
if (first) {
bounds[0]=t.beginPosition();
first=false;
}
last=t;
String lemma = t.lemma();
String pos = t.tag();
if (!stopwords.contains(lemma)) {
String rep = representativeProvider.getRepresentative(lemma, pos);
if (!stopwords.contains(rep)) {
sentence.add(rep);
}
}
}
bounds[1]=last.endPosition();
if (sentence.size()>0) {
res.add(sentence);
boundaries.add(bounds);
}
}
}
return res;
}
示例8: removeVerbsBeforeNouns
import edu.stanford.nlp.ling.CoreLabel; //導入方法依賴的package包/類
/** Given a phrase, if there is (DT* VB+ NN+), remove (DT* VB+) */
public void removeVerbsBeforeNouns(List<CoreMap> remWords, List<CoreMap> matchWords){
// Flags for checking certain conditions
boolean isDT;
boolean isVerb;
boolean isNotNER;
boolean containsNEG;
boolean hasDT = false;
this.tPattern = TokenSequencePattern.compile(REGEX.T_DT_VB_NN_END);
this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
while (this.tMatcher.find()){
matchWords = tMatcher.groupNodes();
for (CoreMap cm: matchWords){
CoreLabel cl = new CoreLabel(cm);
if (cl.lemma() == null) cl.setLemma(cl.word());
// Check if the word is a determiner, no ner and the first word in the phrase
isDT = cl.tag().equals(POS_TAG.DT);
isNotNER = cl.ner().equals(NE_TYPE.NO_NER);
containsNEG = Polarity.NEG_WORDS.contains(cl.lemma().toLowerCase());
isVerb = CoreNLPUtils.isVerb(cl.tag());
if (isDT && isNotNER && !containsNEG){
if (cl.index() == this.phrase.getWordCoreLabelList().get(0).index()){
remWords.add(cm);
hasDT = true;
} else break;
}
// Check if the word is a verb, no ner
else if (isVerb && isNotNER && !containsNEG){
// If it's not preceded by DT, check if it's the first word in the phrase
if (!hasDT) {
if (cl.index() == this.phrase.getWordCoreLabelList().get(0).index()){
if (!this.sg.hasChildren(new IndexedWord(cl))){
remWords.add(cm);
}
} else break;
} else {
if (!this.sg.hasChildren(new IndexedWord(cl))){
remWords.add(cm);
}
}
}
}
// If the multi-word expression is found in the dictionary - don't drop it
if (this.isCoreMapListInDictionary(matchWords)){
matchWords.clear();
remWords.clear();
continue;
}
this.dropWords(remWords, matchWords);
}
}