本文整理汇总了Java中edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation类的典型用法代码示例。如果您正苦于以下问题:Java CorefChainAnnotation类的具体用法?Java CorefChainAnnotation怎么用?Java CorefChainAnnotation使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CorefChainAnnotation类属于edu.stanford.nlp.dcoref.CorefCoreAnnotations包,在下文中一共展示了CorefChainAnnotation类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: _unpronoun
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
private static Map<Integer, Pair<CorefMention, CorefMention>> _unpronoun(Phrase p) {
Stream<Pair<CorefMention, CorefMention>> s =
Stream.of(p.memo(Phrase.coreNLP).get(CorefChainAnnotation.class))
.filter(Objects::nonNull) // Do nothing with an empty map
.flatMap(chains -> chains.entrySet().stream()) // Disassemble the map
.flatMap(entry -> {
// Link each entry to it's main mention
CorefMention main = entry.getValue().getRepresentativeMention();
return entry.getValue().getMentionsInTextualOrder().stream()
.filter(mention -> mention != main)
.map(mention -> makePair(mention, main));
});
// Type inference chokes here so write it down then return.
return s.collect(HashMap::new,
(m, pair) -> m.put(pair.first.headIndex, pair),
(l, r) -> {});
}
示例2: test
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
@Test
public void test() throws Exception {
String text = "Johns Hopkins University was started by Johns Hopkins. Johns Hopkins was a good man.";
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
pipeline.annotate(document);
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
graph.entrySet().forEach(e -> {
LOGGER.info("Got coref key: {}", e.getKey());
LOGGER.info("Got coref val: {}", e.getValue());
e.getValue().getMentionsInTextualOrder().forEach(m -> LOGGER.info("Got mention: {}", m.toString()));
});
LOGGER.info("Got document: {}", document);
LOGGER.info("Got document: {}", document.toString());
AnnotateNonTokenizedConcrete tk = new AnnotateNonTokenizedConcrete();
StanfordPostNERCommunication postNER = tk.annotate(this.nytComm);
postNER.getEntityMentions().forEach(em -> LOGGER.info("Got EM: {}", em));
}
示例3: getCorefInfo
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
List getCorefInfo(Annotation doc) {
Map<Integer, CorefChain> corefChains = doc.get(CorefChainAnnotation.class);
// List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
List entities = new ArrayList();
for (CorefChain chain : corefChains.values()) {
List mentions = new ArrayList();
CorefChain.CorefMention representative = chain.getRepresentativeMention();
for (CorefChain.CorefMention corement : chain.getMentionsInTextualOrder()) {
Map outment = new HashMap();
outment.put("sentence", corement.sentNum-1);
outment.put("tokspan_in_sentence", Lists.newArrayList(
corement.startIndex-1, corement.endIndex-1));
outment.put("head",corement.headIndex-1);
outment.put("gender", corement.gender.toString());
outment.put("animacy", corement.animacy.toString());
outment.put("number", corement.number.toString());
outment.put("mentiontype", corement.mentionType.toString());
outment.put("mentionid", corement.mentionID);
if (representative!=null && corement.mentionID==representative.mentionID) {
outment.put("representative", true);
}
mentions.add(outment);
}
Map entity = ImmutableMap.builder()
.put("mentions", mentions)
.put("entityid", chain.getChainID())
.build();
entities.add(entity);
}
return entities;
}
示例4: parseDocumentWithCoref
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public CorenlpParsedArticle parseDocumentWithCoref(int sectionId, String text, Gson gson) {
// l[]: article_name (String), section_id (int), text
CorenlpParsedArticle pa = new CorenlpParsedArticle();
pa.sectionId = sectionId;
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
pa.numSentence = sentences.size();
for (int i = 0; i < sentences.size(); i++) {
CoreMap sentence = sentences.get(i);
ParsedSentence ps = new ParsedSentence(pa.sectionId, i, sentence, gsf);
pa.parsedsentence.add(ps);
}
{
Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
// D.p(graph.size());
for (Entry<Integer, CorefChain> e : graph.entrySet()) {
int chainid = e.getKey();
CorefChain cc = e.getValue();
CorefResult cr = new CorefResult();
pa.corefchains.add(cr);
// for (CorefMention m : cc.getCorefMentions()) {
// // for (CorefMention m : cc.getMentionsInTextualOrder()) {
// cr.names.add(m.mentionSpan);
// cr.chain.add(new int[] { m.sentNum - 1, m.startIndex - 1,
// m.endIndex - 1 });
// // D.p(m.toString(), cc.getChainID());
// }
}
graph = null;
}
document = null;
sentences = null;
return pa;
}
示例5: parseDocumentJsonWithCoref
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public String parseDocumentJsonWithCoref(int sectionId, String text, Gson gson) {
// l[]: article_name (String), section_id (int), text
CorenlpParsedArticle pa = new CorenlpParsedArticle();
pa.sectionId = sectionId;
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
pa.numSentence = sentences.size();
for (int i = 0; i < sentences.size(); i++) {
CoreMap sentence = sentences.get(i);
ParsedSentence ps = new ParsedSentence(pa.sectionId, i, sentence, gsf);
pa.parsedsentence.add(ps);
}
String result = "";
{
Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
// D.p(graph.size());
for (Entry<Integer, CorefChain> e : graph.entrySet()) {
int chainid = e.getKey();
CorefChain cc = e.getValue();
CorefResult cr = new CorefResult();
pa.corefchains.add(cr);
// for (CorefMention m : cc.getCorefMentions()) {
// // for (CorefMention m : cc.getMentionsInTextualOrder()) {
// cr.names.add(m.mentionSpan);
// cr.chain.add(new int[] { m.sentNum - 1, m.startIndex - 1,
// m.endIndex - 1 });
// // D.p(m.toString(), cc.getChainID());
// }
}
graph = null;
result = gson.toJson(pa);
}
document = null;
sentences = null;
return result;
}
示例6: toCoreferenceMap
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
/**
* Transform an Annotation instance into a map of coreference clusters
*
* @param annotation
* @return
*/
public static Map<Integer, List<CorefMention>> toCoreferenceMap(Annotation annotation) {
HashMap<Integer, List<CorefMention>> corefs = new HashMap<Integer, List<CorefMention>>();
for (CorefChain chain : annotation.get(CorefChainAnnotation.class).values()) {
CorefMention m1 = chain.getRepresentativeMention();
corefs.put(m1.corefClusterID, new ArrayList<CorefMention>());
corefs.get(m1.corefClusterID).add(m1);
for (CorefMention m2 : chain.getMentionsInTextualOrder())
if (m2 != m1)
corefs.get(m2.corefClusterID).add(m2);
}
return corefs;
}
示例7: getdCoreferencedText
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static ArrayList<String> getdCoreferencedText(String text){
Annotation document = new Annotation(text);
pipeline.annotate(document);
ArrayList<String> sentences = new ArrayList<String>();
DocumentPreprocessor dp = new DocumentPreprocessor(
new StringReader(text));
ArrayList<List<HasWord>> processedText = new ArrayList<List<HasWord>>();
for (List<HasWord> sentence : dp){
processedText.add(sentence);
}
//用 representative mention 把 mention替换掉
Map<Integer, CorefChain> graph =
document.get(CorefChainAnnotation.class);
for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()){
CorefChain c = entry.getValue();
CorefMention cm = c.getRepresentativeMention();
for (Entry<IntPair, Set<CorefMention>> e :
c.getMentionMap().entrySet()){
if (cm.endIndex - cm.startIndex >2){
continue; //如果representative mention 词数大于2 就不换了
}
for(CorefMention mention : e.getValue()){
perClusterUpdateSen(processedText,
mention.sentNum,cm.sentNum,
cm.startIndex,cm.endIndex,
mention.startIndex,mention.endIndex);
}
}
}
for (List<HasWord> senlist : processedText){
sentences.add("");
for (HasWord word:senlist){
if (!word.toString().equals("")){
//System.out.print(word.toString()+" ");
String str = sentences.
get(sentences.size()-1) + word.toString().toLowerCase()+" ";
sentences.set(sentences.size()-1, str);
}
}
//System.out.println();
}
for (int i=0; i < sentences.size(); i++){
String s = sentences.get(i);
sentences.set(i, (""+s.charAt(0)).toUpperCase() + s.substring(1)) ;
}
return sentences;
}
示例8: resolveCoRef
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public String resolveCoRef(String text) {
// to hold resolved string
String resolved = new String();
// run the pipeline
Annotation document = runPipeline(text);
// get all coref chains and sentences
Map<Integer, CorefChain> corefs = document.get(CorefChainAnnotation.class);
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
// process each sentence
for (CoreMap sentence : sentences) {
int curSentIdx = sentence.get(SentenceIndexAnnotation.class);
List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
boolean isPronoun = false;
for (CoreLabel token : tokens) {
// process only pronouns
isPronoun = false;
String pos = token.get(PartOfSpeechAnnotation.class);
if (pos.equals("PRP") || pos.equals("PP$")) {
isPronoun = true;
}
Integer corefClustId = token.get(CorefClusterIdAnnotation.class);
CorefChain chain = corefs.get(corefClustId);
// if there is no chain to replace
if (chain == null || chain.getMentionsInTextualOrder().size() == 1 || isPronoun == false) {
resolved += token.word() + token.after();
} else {
int sentIndx = chain.getRepresentativeMention().sentNum - 1;
CorefMention reprMent = chain.getRepresentativeMention();
String rootWord = sentences.get(sentIndx)
.get(TokensAnnotation.class)
.get(reprMent.headIndex - 1)
.originalText();
if (curSentIdx != sentIndx || token.index() < reprMent.startIndex
|| token.index() > reprMent.endIndex) {
if (Character.isUpperCase(token.originalText().charAt(0))) {
rootWord = WordUtils.capitalize(rootWord);
}
resolved += rootWord + token.after();
} else {
resolved += token.word() + token.after();
}
}
}
}
return resolved;
}
示例9: processParses
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
/**
* Start from parsed trees, and run the coref.
*/
public List<EntityMention> processParses(Collection<Tree> trees) {
CoreLabelTokenFactory tokenfactory = new CoreLabelTokenFactory();
List<EntityMention> entities = null;
// Create an empty Annotation
Annotation document = new Annotation("");
try {
// Setup the sentences using CoreMaps and CoreLabels.
List<CoreMap> sentences = new ArrayList<CoreMap>();
for( Tree tree : trees ) {
List<CoreLabel> sentence = new ArrayList<CoreLabel>();
CoreMap sent = new ArrayCoreMap(1);
sent.set(TokensAnnotation.class,sentence);
sentences.add(sent);
// Now add the leaves from the trees as separate tokens.
List<String> strs = TreeOperator.stringLeavesFromTree(tree);
List<String> pos = TreeOperator.posTagsFromTree(tree);
int start = 0, index = 0;
for( String str : strs ) {
CoreLabel label = tokenfactory.makeToken(str, start, start+str.length());
start += str.length() + 1;
label.set(PartOfSpeechAnnotation.class, pos.get(index++));
sentence.add(label);
}
// Now add the parse tree.
sent.set(TreeAnnotation.class, tree);
}
// Add all sentences as an annotation to the document.
document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
// for( CoreMap sen : sentences ) {
// System.out.println(sen);
// }
// NOTE: You can see each annotator get created in the StanfordCoreNLP.java class.
// Look at its function getDefaultAnnotatorPool()
pipeline.annotate(document);
// System.out.println("AFTER");
// for( CoreMap sen : sentences )
// System.out.println(sen);
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
// for( Integer id : graph.keySet() ) System.out.println(id + "\t" + graph.get(id));
entities = extractEntities(graph);
} catch( Exception ex ) {
System.out.println("--STANFORD COREF EXCEPTION-- Parses skipped...");
ex.printStackTrace();
}
return entities;
}
示例10: runCoreNLP
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static void runCoreNLP()
{
Properties props = new Properties();
props.put("annotators", "tokenize,ssplit");//, pos, lemma, ner");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// read some text in the text variable
String text = "Hello how are you Ramesh"; // Add your text here!
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
pipeline.annotate(document);
// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// this is the text of the token
//String word = token.get(TextAnnotation.class);
// this is the POS tag of the token
//String pos = token.get(PartOfSpeechAnnotation.class);
// this is the NER label of the token
//String ne = token.get(NamedEntityTagAnnotation.class);
//System.out.println(token+"/"+ne);
}
// this is the parse tree of the current sentence
Tree tree = sentence.get(TreeAnnotation.class);
// this is the Stanford dependency graph of the current sentence
SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
}
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
Map<Integer, CorefChain> graph =
document.get(CorefChainAnnotation.class);
}
示例11: main
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// read some text in the text variable
String text = "Ricardo Usbeck sits at his table. He is a researcher. entity is an extinct genus of sauropterygian type."; // Add your text here!
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
pipeline.annotate(document);
// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// this is the text of the token
String word = token.get(TextAnnotation.class);
// this is the POS tag of the token
String pos = token.get(PartOfSpeechAnnotation.class);
// this is the NER label of the token
String ne = token.get(NamedEntityTagAnnotation.class);
System.out.println(word + "\t" + pos + "\t" + ne);
}
// this is the parse tree of the current sentence
Tree tree = sentence.get(TreeAnnotation.class);
System.out.println(tree.toString());
// this is the Stanford dependency graph of the current sentence
SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
System.out.println(dependencies.toString());
}
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
Map<Integer, CorefChain> graph =
document.get(CorefChainAnnotation.class);
System.out.println(graph.toString());
}
示例12: process
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static void process() {
// 中文处理需要添加配置文件
String[] args = new String[] {"-props", "StanfordCoreNLP-chinese.properties"
//"edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" // 中文指代消解
};
// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
//Properties props = new Properties();
// tokenize: 分词;ssplit:分句;pos:词性标注;lemma:获取词原型;parse:句法解析(含依存句法);dcoref:同义指代; sentiment 情感分析
//props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");
Properties props = StringUtils.argsToProperties(args);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// read some text in the text variable
//String text = "Stanford University is located in California. It is a great university. Meg is the 2nd female CEO of HP! This is a great PC. This PC is not so good after all! 今天天气真好。"; // Add your text here!
String text = "一些盗版制品经营者为了应付和躲避打击,经营手法更为隐蔽。";
// approach 1
Annotation document = pipeline.process(text);
// approach 2
// // create an empty Annotation just with the given text
// Annotation document = new Annotation(text);
// // run all Annotators on this text
// pipeline.annotate(document);
// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
// 遍历所有句子,输出每一句的处理结果
for (CoreMap sentence : sentences) {
System.out.println("--------------------- Begin of one sentence--------------------------");
System.out.println(sentence.toString());
// 遍历每一个词traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // TODO 还有哪些class?
// this is the text of the token 拆分后的单词
String word = token.get(TextAnnotation.class);
// this is the POS tag of the token 词性:动词VBZ,副词,名词NNP,形容词JJ,冠词DT......
String pos = token.get(PartOfSpeechAnnotation.class);
// this is the NER label of the token 命名实体。 例如:ORGANIZATION,O,LOCATION,DATE,TIME,PERSON 目的是识别语料中人名、地名、组织机构名等命名实体
String ne = token.get(NamedEntityTagAnnotation.class);
System.out.println("word = " + word + " pos = " + pos + " ne = " + ne);
}
// // this is the parse tree of the current sentence
// Tree treeAnnotation = sentence.get(TreeAnnotation.class);
// System.out.println("TreeAnnotation = " + treeAnnotation);
// // 先获取当前句子的依存句法分析结果this is the Stanford dependency graph of the current sentence
// SemanticGraph dependencies = sentence
// .get(CollapsedCCProcessedDependenciesAnnotation.class);
// 情感分析(不支持中文)
Tree treeSentiment = sentence
.get(SentimentAnnotatedTree.class);
int sentiment = RNNCoreAnnotations.getPredictedClass(treeSentiment);
System.out.println(printSentiment(sentiment));
System.out.println("--------------------- End of one sentence--------------------------");
}
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
// coreference 表示指代关系
Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
System.out.println(graph);
}
示例13: testPipeline
import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
/**
* Test from http://nlp.stanford.edu/software/corenlp.shtml
*/
@Test
public void testPipeline() {
// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
Properties props = new Properties();
props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
pipeline.annotate(document);
// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
StringBuffer sentenceStringBuffer = new StringBuffer();
for(CoreMap sentence: sentences) {
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// this is the text of the token
String word = token.get(TextAnnotation.class);
// this is the POS tag of the token
String pos = token.get(PartOfSpeechAnnotation.class);
// this is the NER label of the token
String ne = token.get(NamedEntityTagAnnotation.class);
sentenceStringBuffer.append(word);
sentenceStringBuffer.append("/");
sentenceStringBuffer.append(pos);
sentenceStringBuffer.append("/");
sentenceStringBuffer.append(ne);
sentenceStringBuffer.append(" ");
}
System.out.println(sentenceStringBuffer.toString());
sentenceStringBuffer = new StringBuffer();
// this is the parse tree of the current sentence
Tree tree = sentence.get(TreeAnnotation.class);
// this is the Stanford dependency graph of the current sentence
SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
}
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
Map<Integer, CorefChain> graph =
document.get(CorefChainAnnotation.class);
}