本文整理汇总了Java中edu.stanford.nlp.dcoref.CorefChain.CorefMention类的典型用法代码示例。如果您正苦于以下问题:Java CorefMention类的具体用法?Java CorefMention怎么用?Java CorefMention使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CorefMention类属于edu.stanford.nlp.dcoref.CorefChain包,在下文中一共展示了CorefMention类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: extractTokenRefSequence
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* In order to allow for possibly empty mentions, this will always return a
* validating TokenRefSequence, provided m.end >= m.start. When the end
* points are equal, the token index list will be the empty list, and a
* warning will be logged.
*
* @throws AnalyticException
*/
public static TokenRefSequence extractTokenRefSequence(CorefMention coreMention,
UUID tokUuid, boolean representative) throws AnalyticException {
int start = coreMention.startIndex - 1;
int end = coreMention.endIndex - 1;
LOGGER.debug("Working on mention string: {}", coreMention.mentionSpan);
int head = coreMention.headIndex - 1;
if (end - start < 0) {
throw new AnalyticException(
"Calling extractTokenRefSequence on mention " + coreMention
+ " with head = " + head + ", UUID = " + tokUuid);
} else if (end == start) {
TokenRefSequence tb = new TokenRefSequence();
tb.setTokenizationId(tokUuid).setTokenIndexList(new ArrayList<Integer>());
if (representative)
tb.setAnchorTokenIndex(head);
LOGGER.warn("Creating an EMPTY mention for mention {}, UUID {}", coreMention, tokUuid);
return tb;
}
return extractTokenRefSequence(start, end, head, tokUuid);
}
示例2: main
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* Main function
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// data input
String text = JFile.read(Env.SAMPLE_DIR + "news.txt");
// model loading
StanfordNlpWrapper nlp = new StanfordNlpWrapper(Env.STANFORDNLP_CFG);
nlp.loadAll("tokenize, ssplit, pos, lemma, ner, regexner, parse, dcoref");
// task run
Annotation annotation = nlp.annotate(text);
Map<Integer, List<CorefMention>> mention_map = StanfordNlpWrapper.toCoreferenceMap(annotation);
for (Integer id : mention_map.keySet()) {
List<CorefMention> mentions = mention_map.get(id);
if (mentions.size() > 1)
for (CorefMention m : mentions)
System.out.println(JString.join("\t", id,
m.mentionType, m.mentionSpan, m.sentNum, m.headIndex));
}
}
示例3: _unpronoun
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private static Map<Integer, Pair<CorefMention, CorefMention>> _unpronoun(Phrase p) {
Stream<Pair<CorefMention, CorefMention>> s =
Stream.of(p.memo(Phrase.coreNLP).get(CorefChainAnnotation.class))
.filter(Objects::nonNull) // Do nothing with an empty map
.flatMap(chains -> chains.entrySet().stream()) // Disassemble the map
.flatMap(entry -> {
// Link each entry to it's main mention
CorefMention main = entry.getValue().getRepresentativeMention();
return entry.getValue().getMentionsInTextualOrder().stream()
.filter(mention -> mention != main)
.map(mention -> makePair(mention, main));
});
// Type inference chokes here so write it down then return.
return s.collect(HashMap::new,
(m, pair) -> m.put(pair.first.headIndex, pair),
(l, r) -> {});
}
示例4: getSpanFromMention
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* Returns the {@link Span} in the {@link AnalysedText} which corresponds
* to the given {@link CorefMention}
*
* @param at
* @param sentences
* @param mention
* @return
*/
private Span getSpanFromMention(AnalysedText at, List<CoreMap> sentences, CorefMention mention) {
CoreMap sentence = sentences.get(mention.sentNum - 1);
List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
if (mention.endIndex - mention.startIndex > 1) {
CoreLabel startToken = tokens.get(mention.startIndex - 1);
CoreLabel endToken = tokens.get(mention.endIndex - 2);
return at.addChunk(startToken.beginPosition(), endToken.endPosition());
} else {
CoreLabel token = tokens.get(mention.startIndex - 1);
return at.addToken(token.beginPosition(), token.endPosition());
}
}
示例5: makeEntity
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private Entity makeEntity(CorefChain chain, EntityMentionSet ems, List<Tokenization> tokenizations) throws AnalyticException {
Entity concEntity = new Entity().setUuid(this.gen.next());
CorefChain.CorefMention coreHeadMention = chain.getRepresentativeMention();
// CoreNLP uses 1-based indexing for the sentences
// just subtract 1.
Tokenization tkz = tokenizations.get(coreHeadMention.sentNum - 1);
UUID tkzUuid = tkz.getUuid();
LOGGER.debug("Creating EntityMention based on tokenization: {}", tkzUuid.getUuidString());
EntityMention concHeadMention = makeEntityMention(coreHeadMention, tkzUuid, true);
TokenRefSequence trs = concHeadMention.getTokens();
// TODO: below throws if they're invalid. maybe this can be removed in the future.
this.validateTokenRefSeqValidity(trs, tkz);
concEntity.setCanonicalName(coreHeadMention.mentionSpan);
concEntity.addToMentionIdList(concHeadMention.getUuid());
ems.addToMentionList(concHeadMention);
for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
if (mention == coreHeadMention)
continue;
// CoreNLP uses 1-based indexing for the sentences
// we'll just subtract one.
Tokenization localTkz = tokenizations.get(mention.sentNum - 1);
EntityMention concMention = this.makeEntityMention(mention, localTkz.getUuid(), false);
TokenRefSequence localTrs = concMention.getTokens();
this.validateTokenRefSeqValidity(localTrs, localTkz);
ems.addToMentionList(concMention);
concEntity.addToMentionIdList(concMention.getUuid());
}
return concEntity;
}
示例6: makeEntityMention
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private EntityMention makeEntityMention(CorefChain.CorefMention coreMention, UUID tokenizationUuid, boolean representative) throws AnalyticException {
EntityMention concEntityMention = new EntityMention().setUuid(this.gen.next());
TokenRefSequence trs = extractTokenRefSequence(coreMention, tokenizationUuid, representative);
concEntityMention.setTokens(trs);
concEntityMention.setText(coreMention.mentionSpan);
// TODO: we could possibly add mention types. We could use a feature of
// CoreNLP:
// MentionType mentionType = coreMention.mentionType;
// or we could use a heuristic (see concrete-agiga).
// String emType = getEntityMentionType(em, tokenization);
return concEntityMention;
}
示例7: cr
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* 对输入文本进行指代消解<br>
* 以指代链中最先出现的指代作为被指代的词(短语)<br>
* key:MD5(element + sentNum + startIndex + endIndex)
*
* @param graph 指代链
*
* @return
*/
private Map<String, CoreferenceElement> cr(Map<Integer, CorefChain> graph) {
Map<String, CoreferenceElement> result = new HashMap<String, CoreferenceElement>();
if (MapUtils.isEmpty(graph)) {
return result;
}
Set<Map.Entry<Integer, CorefChain>> set = graph.entrySet();
for (Iterator<Map.Entry<Integer, CorefChain>> it = set.iterator(); it.hasNext(); ) {
Map.Entry<Integer, CorefChain> entry = it.next();
if (entry.getValue().getMentionsInTextualOrder().size() > 1) {
CorefMention firstElement = entry.getValue().getMentionsInTextualOrder().get(0);
/* 以第一个词(短语)作为被指代的词(短语) */
CoreferenceElement ref = new CoreferenceElement(firstElement.mentionSpan, firstElement.corefClusterID, firstElement.startIndex, firstElement.endIndex, firstElement.sentNum, null);
for (int k = 1; k < entry.getValue().getMentionsInTextualOrder().size(); k++) {
CorefMention mention = entry.getValue().getMentionsInTextualOrder().get(k);
if (mention != null) {
CoreferenceElement element = new CoreferenceElement(mention.mentionSpan, mention.corefClusterID, mention.startIndex, mention.endIndex, mention.sentNum, ref);
try {
result.put(Encipher.MD5(element.getElement() + element.getSentNum() + element.getStartIndex() + element.getEndIndex()), element);
} catch (NoSuchAlgorithmException | UnsupportedEncodingException e) {
this.log.error("MD5 encode error!", e);
}
}
}
}
}
return result;
}
示例8: extractEntities
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private List<EntityMention> extractEntities(Map<Integer, CorefChain> graph) {
List<EntityMention> mentions = new ArrayList<EntityMention>();
for( Integer id : graph.keySet() ) {
CorefChain chain = graph.get(id);
for( CorefMention cmen : chain.getMentionsInTextualOrder() ) {
EntityMention mention = new EntityMention(cmen.sentNum, cmen.mentionSpan, cmen.startIndex, cmen.endIndex-1, cmen.corefClusterID);
// System.out.println(cmen + "\t->\t" + mention);
mentions.add(mention);
}
}
return mentions;
}
示例9: toCoreferenceMap
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* Transform an Annotation instance into a map of coreference clusters
*
* @param annotation
* @return
*/
public static Map<Integer, List<CorefMention>> toCoreferenceMap(Annotation annotation) {
HashMap<Integer, List<CorefMention>> corefs = new HashMap<Integer, List<CorefMention>>();
for (CorefChain chain : annotation.get(CorefChainAnnotation.class).values()) {
CorefMention m1 = chain.getRepresentativeMention();
corefs.put(m1.corefClusterID, new ArrayList<CorefMention>());
corefs.get(m1.corefClusterID).add(m1);
for (CorefMention m2 : chain.getMentionsInTextualOrder())
if (m2 != m1)
corefs.get(m2.corefClusterID).add(m2);
}
return corefs;
}
示例10: generatePronounEdges
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* Returns some new rules learned about a pronoun given its match
* context from anaphora resolution.
*
* Specifically, we fill in the tags
*
* _animate(main mention, ___).
* _gender(main mention, ___).
* _number(main mention, ___).
*
* Basically, we can tell if it is animate, it's gender, and it's count.
* @return A list of semantic notes.
*/
public static List<Edge> generatePronounEdges(
SemanticGraph g, IndexedWord w, Phrase t) {
List<Edge> edges = new ArrayList<>();
if (t.getUnpronoun().containsKey(w.index())) {
// Use what we know about the pronoun
Pair<CorefMention, CorefMention> mention_edge = t.getUnpronoun().get(w.index());
String main_noun = Trees.concatNoun(g, g.getNodeByIndex(mention_edge.second.headIndex));
Animacy is_animate = mention_edge.first.animacy;
if (is_animate != Animacy.UNKNOWN) {
edges.add(new Edge(
main_noun, "_animate", is_animate.toString()));
}
Gender gender = mention_edge.first.gender;
if (gender != Gender.UNKNOWN) {
edges.add(new Edge(
main_noun, "_gender", gender.toString()));
}
Dictionaries.Number number = mention_edge.first.number;
if (number != Dictionaries.Number.UNKNOWN) {
edges.add(new Edge(
main_noun, "_number", number.toString()));
}
}
return edges;
}
示例11: getMainMention
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* Get the full text of the main mention of a particular word, if it has a
* better mention. Otherwise just get it's segment of the tree using
* concatNoun()
*
* @param phrase
* @param w
* @return
*/
public static String getMainMention(
Phrase phrase, SemanticGraph graph, IndexedWord word) {
Pair<CorefMention, CorefMention> linked_refs =
phrase.getUnpronoun().get(word.index());
if (linked_refs == null) {
return Trees.concatNoun(graph, word);
} else {
return linked_refs.second.mentionSpan;
}
}
示例12: getLinks
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
public static List<Pair<IntTuple, IntTuple>> getLinks(
Map<Integer, CorefChain> result) {
List<Pair<IntTuple, IntTuple>> links = new ArrayList<Pair<IntTuple, IntTuple>>();
MentionComparator comparator = new MentionComparator();
for(CorefChain c : result.values()) {
List<CorefMention> s = c.getMentionsInTextualOrder();
for(CorefMention m1 : s){
for(CorefMention m2 : s){
if(comparator.compare(m1, m2)==1) links.add(new Pair<IntTuple, IntTuple>(m1.position, m2.position));
}
}
}
return links;
}
示例13: addCorefMentions
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
* Adds annotations for coref mentions to the {@link Span}s in the {@link AnalysedText}
*
* @param graph
* @param at
* @param sentences
*/
private void addCorefMentions(Map<Integer, CorefChain> graph, AnalysedText at, List<CoreMap> sentences) {
for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) {
CorefChain chain = entry.getValue();
CorefMention reprMention = chain.getRepresentativeMention();
List<CorefMention> mentions = chain.getMentionsInTextualOrder();
/*
* We don't care about chains with only 1 mention because those contain
* only the representative mention without any other mention in the text.
*/
if (mentions.size() < 2) {
continue;
}
for (CorefMention mention : mentions) {
Span mentionedSpan = getSpanFromMention(at, sentences, mention);
Set<Span> mentionsAsSpans = new HashSet<Span>();
boolean isRepresentative = mention.equals(reprMention);
for (CorefMention otherMention : mentions) {
if (!otherMention.equals(mention)) {
mentionsAsSpans.add(getSpanFromMention(at, sentences, otherMention));
}
}
mentionedSpan.addAnnotation(COREF_ANNOTATION,
Value.value(new CorefFeature(isRepresentative, mentionsAsSpans)));
}
}
}
示例14: getdCoreferencedText
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
public static ArrayList<String> getdCoreferencedText(String text){
Annotation document = new Annotation(text);
pipeline.annotate(document);
ArrayList<String> sentences = new ArrayList<String>();
DocumentPreprocessor dp = new DocumentPreprocessor(
new StringReader(text));
ArrayList<List<HasWord>> processedText = new ArrayList<List<HasWord>>();
for (List<HasWord> sentence : dp){
processedText.add(sentence);
}
//用 representative mention 把 mention替换掉
Map<Integer, CorefChain> graph =
document.get(CorefChainAnnotation.class);
for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()){
CorefChain c = entry.getValue();
CorefMention cm = c.getRepresentativeMention();
for (Entry<IntPair, Set<CorefMention>> e :
c.getMentionMap().entrySet()){
if (cm.endIndex - cm.startIndex >2){
continue; //如果representative mention 词数大于2 就不换了
}
for(CorefMention mention : e.getValue()){
perClusterUpdateSen(processedText,
mention.sentNum,cm.sentNum,
cm.startIndex,cm.endIndex,
mention.startIndex,mention.endIndex);
}
}
}
for (List<HasWord> senlist : processedText){
sentences.add("");
for (HasWord word:senlist){
if (!word.toString().equals("")){
//System.out.print(word.toString()+" ");
String str = sentences.
get(sentences.size()-1) + word.toString().toLowerCase()+" ";
sentences.set(sentences.size()-1, str);
}
}
//System.out.println();
}
for (int i=0; i < sentences.size(); i++){
String s = sentences.get(i);
sentences.set(i, (""+s.charAt(0)).toUpperCase() + s.substring(1)) ;
}
return sentences;
}
示例15: resolveCoRef
import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
public String resolveCoRef(String text) {
// to hold resolved string
String resolved = new String();
// run the pipeline
Annotation document = runPipeline(text);
// get all coref chains and sentences
Map<Integer, CorefChain> corefs = document.get(CorefChainAnnotation.class);
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
// process each sentence
for (CoreMap sentence : sentences) {
int curSentIdx = sentence.get(SentenceIndexAnnotation.class);
List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
boolean isPronoun = false;
for (CoreLabel token : tokens) {
// process only pronouns
isPronoun = false;
String pos = token.get(PartOfSpeechAnnotation.class);
if (pos.equals("PRP") || pos.equals("PP$")) {
isPronoun = true;
}
Integer corefClustId = token.get(CorefClusterIdAnnotation.class);
CorefChain chain = corefs.get(corefClustId);
// if there is no chain to replace
if (chain == null || chain.getMentionsInTextualOrder().size() == 1 || isPronoun == false) {
resolved += token.word() + token.after();
} else {
int sentIndx = chain.getRepresentativeMention().sentNum - 1;
CorefMention reprMent = chain.getRepresentativeMention();
String rootWord = sentences.get(sentIndx)
.get(TokensAnnotation.class)
.get(reprMent.headIndex - 1)
.originalText();
if (curSentIdx != sentIndx || token.index() < reprMent.startIndex
|| token.index() > reprMent.endIndex) {
if (Character.isUpperCase(token.originalText().charAt(0))) {
rootWord = WordUtils.capitalize(rootWord);
}
resolved += rootWord + token.after();
} else {
resolved += token.word() + token.after();
}
}
}
}
return resolved;
}