本文整理汇总了Java中edu.stanford.nlp.util.CoreMap类的典型用法代码示例。如果您正苦于以下问题:Java CoreMap类的具体用法?Java CoreMap怎么用?Java CoreMap使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CoreMap类属于edu.stanford.nlp.util包,在下文中一共展示了CoreMap类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: extractSentences
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
/**
* Split a document into array of sentences
*
* @param text
* @return
* @throws Exception
*/
public static String[] extractSentences(String text) throws Exception {
Properties props = new Properties();
props.put("annotators", "tokenize, ssplit");
StanfordCoreNLP pipeline = new StanfordCoreNLP();
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
String[] sentenceList = new String[sentences.size()];
for (int i = 0; i < sentenceList.length; i++) {
CoreMap sentence = sentences.get(i);
sentenceList[i] = sentence.toString();
}
return sentenceList;
}
示例2: getStanfordSentimentRate
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public int getStanfordSentimentRate(String sentimentText) {
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
//StanfordCoreNLP
int totalRate = 0;
String[] linesArr = sentimentText.split("\\.");
for (int i = 0; i < linesArr.length; i++) {
if (linesArr[i] != null) {
Annotation annotation = pipeline.process(linesArr[i]);
for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
int score = RNNCoreAnnotations.getPredictedClass(tree);
totalRate = totalRate + (score - 2);
}
}
}
return totalRate;
}
示例3: tokenize
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public LinkedList<String> tokenize(String text) {
LinkedList<String> res = new LinkedList<>();
if (text != null) {
Annotation qaTokens = new Annotation(text);
pipelineTokens.annotate(qaTokens);
List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentenceTokens : qssTokens) {
ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
for (CoreLabel t : tokens) {
String lemma = t.lemma();
String pos = t.tag();
if (!stopwords.contains(lemma)) {
String rep = representativeProvider.getRepresentative(lemma, pos);
if (!stopwords.contains(rep)) {
res.add(rep);
}
}
}
}
}
return res;
}
示例4: annotate
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
protected void annotate(StanfordCoreNLP pipeline, Annotation ann) {
if (ann.get(CoreAnnotations.SentencesAnnotation.class) == null) {
pipeline.annotate(ann);
}
else {
if (ann.get(CoreAnnotations.SentencesAnnotation.class).size() == 1) {
CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
token.remove(NaturalLogicAnnotations.OperatorAnnotation.class);
token.remove(NaturalLogicAnnotations.PolarityAnnotation.class);
}
sentence.remove(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
sentence.remove(NaturalLogicAnnotations.EntailedSentencesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
pipeline.annotate(ann);
}
}
}
示例5: minimizeSubject
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public static void minimizeSubject(AnnotatedPhrase subject, SemanticGraph sg, ObjectOpenHashSet<String> collocations){
// Do the safe minimization first
SubjSafeMinimization.minimizeSubject(subject, sg);
// If the subject is frequent, don't minimize anything
if (collocations.contains(CoreNLPUtils.listOfWordsToLemmaString(subject.getWordList()).toLowerCase())){
return;
}
// Minimization object
Minimization simp = new Minimization(subject, sg, collocations);
// remWords: list of words to be removed (reusable variable)
// matchWords: list of matched words from the regex (reusable variable)
List<CoreMap> remWords = new ArrayList<>();
List<CoreMap> matchWords = new ArrayList<>();
// Safe minimization on the noun phrases and named entities within the subj. phrase
simp.nounPhraseDictMinimization(remWords, matchWords);
simp.removeVerbsBeforeNouns(remWords, matchWords);
simp.namedEntityDictionaryMinimization(remWords, matchWords);
}
示例6: minimizeRelation
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
/**
* Minimize only the relations that are considered to have "non-frequent patterns"
* @param rel: the relation phrase
* @param sg: semantic graph of the sentence
* @param freqRels: dictionary of multi-word expressions (frequent relations)
*/
public static void minimizeRelation(AnnotatedPhrase rel, SemanticGraph sg, ObjectOpenHashSet<String> collocations){
// Do the safe minimization first
RelSafeMinimization.minimizeRelation(rel, sg);
// If the subject is frequent, don't minimize anything
if (collocations.contains(CoreNLPUtils.listOfWordsToLemmaString(rel.getWordList()).toLowerCase())){
return;
}
// Do the safe minimization first
RelSafeMinimization.minimizeRelation(rel, sg);
// remWords: list of words to be removed (reusable variable)
// matchWords: list of matched words from the regex (reusable variable)
List<CoreMap> remWords = new ArrayList<>();
List<CoreMap> matchWords = new ArrayList<>();
// Move to the dict. minimization of the noun phrases within the relation
Minimization simp = new Minimization(rel, sg, collocations);
simp.nounPhraseDictMinimization(remWords, matchWords);
simp.namedEntityDictionaryMinimization(remWords, matchWords);
}
示例7: dropWords
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
/**
* Given a list of words to be removed and a list of matched nodes, remove the words to be removed from the phrase and
* empty that list, also empty the list of matched nodes
* @param remWords
* @param matchedNodes
*/
public void dropWords(List<CoreMap> remWords, List<CoreMap> matchWords){
matchWords.clear();
// in addition to removing the words, save them in a separate list
ObjectArrayList<SemanticGraphEdge> droppedEdges = CoreNLPUtils.listOfCoreMapWordsToParentEdges(this.sg, remWords);
/*ObjectArrayList<SemanticGraphEdge> droppedEdges = new ObjectArrayList<SemanticGraphEdge>();
for (IndexedWord word: remWordsArray) {
SemanticGraphEdge edge = this.sg.getEdge(this.sg.getParent(word), word);
droppedEdges.add(edge);
}*/
this.phrase.addDroppedEdges(droppedEdges);
this.phrase.addDroppedWords(CoreNLPUtils.getWordListFromCoreMapList(remWords));
// remove words
this.phrase.removeCoreLabelWordsFromList(remWords);
remWords.clear();
}
示例8: namedEntityDictionaryMinimization
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
/** Given a phrase, if it contains NERs, make a dictionary minimization around them **/
public void namedEntityDictionaryMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
// If (.* DT+ [RB|JJ]* NER+ .*) => drop (DT+)
this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_JJ_NER);
this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
while (this.tMatcher.find()){
matchWords = tMatcher.groupNodes();
for (CoreMap cm: matchWords){
CoreLabel cl = new CoreLabel(cm);
if (cl.lemma() == null) cl.setLemma(cl.word());
// Check if the word is DT, drop it
if ((CoreNLPUtils.isAdj(cl.tag()) || CoreNLPUtils.isAdverb(cl.tag()))
&& cl.ner().equals(NE_TYPE.NO_NER)){
remWords.add(cm);
}
}
// Drop the words not found in dict.
this.dropWordsNotFoundInDict(matchWords, remWords);
}
// Do the safe minimization
this.namedEntitySafeMinimization(remWords, matchWords);
}
示例9: minimizeObject
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
/**
* Minimize only the objects that are considered to have "non-frequent patterns"
* @param obj: the object phrase
* @param sg: semantic graph of the sentence
* @param freqObjs: dictionary of multi-word expressions (frequent objects)
*/
public static void minimizeObject(AnnotatedPhrase obj, SemanticGraph sg, ObjectOpenHashSet<String> collocations){
// Do the safe minimization first
ObjSafeMinimization.minimizeObject(obj, sg);
// If the object is frequent, don't minimize anything
if (collocations.contains(CoreNLPUtils.listOfWordsToLemmaString(obj.getWordList()).toLowerCase())){
return;
}
// Minimization object
Minimization simp = new Minimization(obj, sg, collocations);
// remWords: list of words to be removed (reusable variable)
// matchWords: list of matched words from the regex (reusable variable)
List<CoreMap> remWords = new ArrayList<>();
List<CoreMap> matchWords = new ArrayList<>();
// Safe minimization on the noun phrases and named entities within the subj. phrase
simp.nounPhraseDictMinimization(remWords, matchWords);
simp.namedEntityDictionaryMinimization(remWords, matchWords);
}
示例10: lemmatize
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public List<List<String>> lemmatize(String documentText)
{
List<List<String>> lemmas = new ArrayList<List<String>>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
List<String> sentence_lemmas = new ArrayList<String>();
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
sentence_lemmas.add(token.get(LemmaAnnotation.class));
}
lemmas.add(sentence_lemmas);
}
return lemmas;
}
示例11: tagAndTokenize
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public Pair<List<String>, List<String>> tagAndTokenize(String documentText)
{
List<String> tags = new ArrayList<String>();
List<String> tokens = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
tokens.add(token.word());
}
}
return new Pair<List<String>, List<String>>(tags, tokens);
}
示例12: tag
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public List<String> tag(String documentText)
{
List<String> tags = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
}
}
return tags;
}
示例13: prepareSUTParser
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
/**
* Prepares the check for a temporal expression.
*
* @param cell
* Holds the column´s cell
* @param pipeline
* Used for temporal expressions.
* @param result
* Holds the intermediate result before executing this operation.
* @return Holds the intermediate result after executing this operation.
*/
private int prepareSUTParser(String cell, AnnotationPipeline pipeline,
int result) {
if ((!cell.trim().isEmpty()) && (!cell.trim().equals("-")
&& !cell.trim().equals("--") && !cell.trim().equals("---")
&& !cell.trim().equals("n/a") && !cell.trim().equals("N/A")
&& !cell.trim().equals("(n/a)")
&& !cell.trim().equals("Unknown")
&& !cell.trim().equals("unknown") && !cell.trim().equals("?")
&& !cell.trim().equals("??") && !cell.trim().equals(".")
&& !cell.trim().equals("null") && !cell.trim().equals("NULL")
&& !cell.trim().equals("Null"))) {
Annotation annotation = new Annotation(cell);
annotation.set(CoreAnnotations.DocDateAnnotation.class,
"2013-07-14");
pipeline.annotate(annotation);
List<CoreMap> timexAnnsAll = annotation
.get(TimeAnnotations.TimexAnnotations.class);
if (timexAnnsAll != null)
if (!timexAnnsAll.isEmpty())
result++;
}
return result;
}
示例14: main
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public static void main(String[] args) {
// 载入自定义的Properties文件
StanfordCoreNLP pipeline = new StanfordCoreNLP("CoreNLP-chinese.properties");
// 用一些文本来初始化一个注释。文本是构造函数的参数。
Annotation annotation;
annotation = pipeline.process("我爱北京天安门");
// 从注释中获取CoreMap List,并取第0个值
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
CoreMap sentence = sentences.get(0);
// 从CoreMap中取出CoreLabel List,逐一打印出来
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
System.out.println("字/词");
System.out.println("-----------------------------");
for (CoreLabel token : tokens) {
String word = token.getString(TextAnnotation.class);
// String pos = token.getString(PartOfSpeechAnnotation.class);
// String ner = token.getString(NamedEntityTagAnnotation.class);
System.out.println(word);
}
}
示例15: findSentiment
import edu.stanford.nlp.util.CoreMap; //导入依赖的package包/类
public static int findSentiment(String tweet) {
int mainSentiment = 0;
if (tweet != null && tweet.length() > 0) {
int longest = 0;
Annotation annotation = pipeline.process(tweet);
for (CoreMap sentence : annotation
.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence
.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
String partText = sentence.toString();
if (partText.length() > longest) {
mainSentiment = sentiment;
longest = partText.length();
}
}
}
return mainSentiment;
}