当前位置: 首页>>代码示例>>Java>>正文


Java CorefMention类代码示例

本文整理汇总了Java中edu.stanford.nlp.dcoref.CorefChain.CorefMention的典型用法代码示例。如果您正苦于以下问题:Java CorefMention类的具体用法?Java CorefMention怎么用?Java CorefMention使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


CorefMention类属于edu.stanford.nlp.dcoref.CorefChain包,在下文中一共展示了CorefMention类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: extractTokenRefSequence

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * In order to allow for possibly empty mentions, this will always return a
 * validating TokenRefSequence, provided m.end >= m.start. When the end
 * points are equal, the token index list will be the empty list, and a
 * warning will be logged.
 *
 * @throws AnalyticException
 */
public static TokenRefSequence extractTokenRefSequence(CorefMention coreMention,
    UUID tokUuid, boolean representative) throws AnalyticException {
  int start = coreMention.startIndex - 1;
  int end   = coreMention.endIndex - 1;
  LOGGER.debug("Working on mention string: {}", coreMention.mentionSpan);
  int head = coreMention.headIndex - 1;
  if (end - start < 0) {
    throw new AnalyticException(
        "Calling extractTokenRefSequence on mention " + coreMention
            + " with head = " + head + ", UUID = " + tokUuid);
  } else if (end == start) {
    TokenRefSequence tb = new TokenRefSequence();
    tb.setTokenizationId(tokUuid).setTokenIndexList(new ArrayList<Integer>());
    if (representative)
      tb.setAnchorTokenIndex(head);

    LOGGER.warn("Creating an EMPTY mention for mention {}, UUID {}", coreMention, tokUuid);
    return tb;
  }
  return extractTokenRefSequence(start, end, head, tokUuid);
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:30,代码来源:CorefManager.java

示例2: main

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * Main function
 * 
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
	// data input
	String text = JFile.read(Env.SAMPLE_DIR + "news.txt");

	// model loading
	StanfordNlpWrapper nlp = new StanfordNlpWrapper(Env.STANFORDNLP_CFG);
	nlp.loadAll("tokenize, ssplit, pos, lemma, ner, regexner, parse, dcoref");

	// task run
	Annotation annotation = nlp.annotate(text);
	Map<Integer, List<CorefMention>> mention_map = StanfordNlpWrapper.toCoreferenceMap(annotation);
	for (Integer id : mention_map.keySet()) {
		List<CorefMention> mentions = mention_map.get(id);
		if (mentions.size() > 1)
			for (CorefMention m : mentions)
				System.out.println(JString.join("\t", id, 
						m.mentionType, m.mentionSpan, m.sentNum, m.headIndex));
	}
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:26,代码来源:CoreferenceResolution.java

示例3: _unpronoun

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private static Map<Integer, Pair<CorefMention, CorefMention>> _unpronoun(Phrase p) {
	Stream<Pair<CorefMention, CorefMention>> s =
			Stream.of(p.memo(Phrase.coreNLP).get(CorefChainAnnotation.class))
		.filter(Objects::nonNull)  // Do nothing with an empty map
		.flatMap(chains -> chains.entrySet().stream()) // Disassemble the map
	    .flatMap(entry -> {
			// Link each entry to it's main mention
			CorefMention main = entry.getValue().getRepresentativeMention();
			return entry.getValue().getMentionsInTextualOrder().stream()
				.filter(mention -> mention != main)
				.map(mention -> makePair(mention, main));
		});
	// Type inference chokes here so write it down then return.
	return s.collect(HashMap::new,
			(m, pair) -> m.put(pair.first.headIndex, pair),
			(l, r) -> {});
}
 
开发者ID:SeanTater,项目名称:uncc2014watsonsim,代码行数:18,代码来源:Phrase.java

示例4: getSpanFromMention

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * Returns the {@link Span} in the {@link AnalysedText} which corresponds
 * to the given {@link CorefMention}
 * 
 * @param at
 * @param sentences
 * @param mention
 * @return
 */
private Span getSpanFromMention(AnalysedText at, List<CoreMap> sentences, CorefMention mention) {
    CoreMap sentence = sentences.get(mention.sentNum - 1);
    List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
    
    if (mention.endIndex - mention.startIndex > 1) {
        CoreLabel startToken = tokens.get(mention.startIndex - 1);
        CoreLabel endToken = tokens.get(mention.endIndex - 2);
        
        return at.addChunk(startToken.beginPosition(), endToken.endPosition());
    } else {
        CoreLabel token = tokens.get(mention.startIndex - 1);
        
        return at.addToken(token.beginPosition(), token.endPosition());
    }
}
 
开发者ID:westei,项目名称:stanbol-stanfordnlp,代码行数:25,代码来源:StanfordNlpAnalyzer.java

示例5: makeEntity

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private Entity makeEntity(CorefChain chain, EntityMentionSet ems, List<Tokenization> tokenizations) throws AnalyticException {
  Entity concEntity = new Entity().setUuid(this.gen.next());
  CorefChain.CorefMention coreHeadMention = chain.getRepresentativeMention();
  // CoreNLP uses 1-based indexing for the sentences
  // just subtract 1.
  Tokenization tkz = tokenizations.get(coreHeadMention.sentNum - 1);
  UUID tkzUuid = tkz.getUuid();
  LOGGER.debug("Creating EntityMention based on tokenization: {}", tkzUuid.getUuidString());
  EntityMention concHeadMention = makeEntityMention(coreHeadMention, tkzUuid, true);
  TokenRefSequence trs = concHeadMention.getTokens();

  // TODO: below throws if they're invalid. maybe this can be removed in the future.
  this.validateTokenRefSeqValidity(trs, tkz);

  concEntity.setCanonicalName(coreHeadMention.mentionSpan);
  concEntity.addToMentionIdList(concHeadMention.getUuid());
  ems.addToMentionList(concHeadMention);
  for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
    if (mention == coreHeadMention)
      continue;
    // CoreNLP uses 1-based indexing for the sentences
    // we'll just subtract one.
    Tokenization localTkz = tokenizations.get(mention.sentNum - 1);
    EntityMention concMention = this.makeEntityMention(mention, localTkz.getUuid(), false);
    TokenRefSequence localTrs = concMention.getTokens();
    this.validateTokenRefSeqValidity(localTrs, localTkz);

    ems.addToMentionList(concMention);
    concEntity.addToMentionIdList(concMention.getUuid());
  }
  return concEntity;
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:33,代码来源:CorefManager.java

示例6: makeEntityMention

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private EntityMention makeEntityMention(CorefChain.CorefMention coreMention, UUID tokenizationUuid, boolean representative) throws AnalyticException {
  EntityMention concEntityMention = new EntityMention().setUuid(this.gen.next());
  TokenRefSequence trs = extractTokenRefSequence(coreMention, tokenizationUuid, representative);
  concEntityMention.setTokens(trs);
  concEntityMention.setText(coreMention.mentionSpan);
  // TODO: we could possibly add mention types. We could use a feature of
  // CoreNLP:
  // MentionType mentionType = coreMention.mentionType;
  // or we could use a heuristic (see concrete-agiga).
  // String emType = getEntityMentionType(em, tokenization);
  return concEntityMention;
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:13,代码来源:CorefManager.java

示例7: cr

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * 对输入文本进行指代消解<br>
 * 以指代链中最先出现的指代作为被指代的词(短语)<br>
 * key:MD5(element + sentNum + startIndex + endIndex)
 *
 * @param graph 指代链
 *
 * @return
 */
private Map<String, CoreferenceElement> cr(Map<Integer, CorefChain> graph) {

    Map<String, CoreferenceElement> result = new HashMap<String, CoreferenceElement>();
    if (MapUtils.isEmpty(graph)) {
        return result;
    }

    Set<Map.Entry<Integer, CorefChain>> set = graph.entrySet();
    for (Iterator<Map.Entry<Integer, CorefChain>> it = set.iterator(); it.hasNext(); ) {

        Map.Entry<Integer, CorefChain> entry = it.next();

        if (entry.getValue().getMentionsInTextualOrder().size() > 1) {

            CorefMention firstElement = entry.getValue().getMentionsInTextualOrder().get(0);
            /* 以第一个词(短语)作为被指代的词(短语) */
            CoreferenceElement ref = new CoreferenceElement(firstElement.mentionSpan, firstElement.corefClusterID, firstElement.startIndex, firstElement.endIndex, firstElement.sentNum, null);
            for (int k = 1; k < entry.getValue().getMentionsInTextualOrder().size(); k++) {

                CorefMention mention = entry.getValue().getMentionsInTextualOrder().get(k);

                if (mention != null) {
                    CoreferenceElement element = new CoreferenceElement(mention.mentionSpan, mention.corefClusterID, mention.startIndex, mention.endIndex, mention.sentNum, ref);
                    try {
                        result.put(Encipher.MD5(element.getElement() + element.getSentNum() + element.getStartIndex() + element.getEndIndex()), element);
                    } catch (NoSuchAlgorithmException | UnsupportedEncodingException e) {

                        this.log.error("MD5 encode error!", e);

                    }
                }

            }
        }
    }
    return result;
}
 
开发者ID:procyon-lotor,项目名称:event-direct-mts,代码行数:47,代码来源:EventsExtractBasedOnGraphV2.java

示例8: extractEntities

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
private List<EntityMention> extractEntities(Map<Integer, CorefChain> graph) {
    List<EntityMention> mentions = new ArrayList<EntityMention>();
    
    for( Integer id : graph.keySet() ) {
      CorefChain chain = graph.get(id);
      for( CorefMention cmen : chain.getMentionsInTextualOrder() ) {
        EntityMention mention = new EntityMention(cmen.sentNum, cmen.mentionSpan, cmen.startIndex, cmen.endIndex-1, cmen.corefClusterID);
//        System.out.println(cmen + "\t->\t" + mention);
        mentions.add(mention);
      }
    }
    return mentions;
  }
 
开发者ID:nchambers,项目名称:schemas,代码行数:14,代码来源:CorefStanford.java

示例9: toCoreferenceMap

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * Transform an Annotation instance into a map of coreference clusters
 * 
 * @param annotation
 * @return
 */
public static Map<Integer, List<CorefMention>> toCoreferenceMap(Annotation annotation) {
	HashMap<Integer, List<CorefMention>> corefs = new HashMap<Integer, List<CorefMention>>();
	for (CorefChain chain : annotation.get(CorefChainAnnotation.class).values()) {
		CorefMention m1 = chain.getRepresentativeMention();
		corefs.put(m1.corefClusterID, new ArrayList<CorefMention>());
		corefs.get(m1.corefClusterID).add(m1);
		for (CorefMention m2 : chain.getMentionsInTextualOrder())
			if (m2 != m1)
				corefs.get(m2.corefClusterID).add(m2);
	}
	return corefs;
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:19,代码来源:StanfordNlpWrapper.java

示例10: generatePronounEdges

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * Returns some new rules learned about a pronoun given its match
 * context from anaphora resolution.
 * 
 * Specifically, we fill in the tags
 * 
 * _animate(main mention, ___).
 * _gender(main mention, ___).
 * _number(main mention, ___).
 * 
 * Basically, we can tell if it is animate, it's gender, and it's count.
 * @return A list of semantic notes.
 */
public static List<Edge> generatePronounEdges(
		SemanticGraph g, IndexedWord w, Phrase t) {
	List<Edge> edges = new ArrayList<>();
	if (t.getUnpronoun().containsKey(w.index())) {
		// Use what we know about the pronoun
		Pair<CorefMention, CorefMention> mention_edge = t.getUnpronoun().get(w.index());
		String main_noun = Trees.concatNoun(g, g.getNodeByIndex(mention_edge.second.headIndex));
		
		Animacy is_animate = mention_edge.first.animacy;
		if (is_animate != Animacy.UNKNOWN) {
			edges.add(new Edge(
				main_noun, "_animate", is_animate.toString()));
		}
		
		Gender gender = mention_edge.first.gender;
		if (gender != Gender.UNKNOWN) {
			edges.add(new Edge(
				main_noun, "_gender", gender.toString()));
		}
		
		Dictionaries.Number number = mention_edge.first.number;
		if (number != Dictionaries.Number.UNKNOWN) {
			edges.add(new Edge(
				main_noun, "_number", number.toString()));
		}
	}
	return edges;
}
 
开发者ID:SeanTater,项目名称:uncc2014watsonsim,代码行数:42,代码来源:Edges.java

示例11: getMainMention

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * Get the full text of the main mention of a particular word, if it has a
 * better mention. Otherwise just get it's segment of the tree using
 * concatNoun()
 * 
 * @param phrase
 * @param w
 * @return
 */
public static String getMainMention(
		Phrase phrase, SemanticGraph graph, IndexedWord word) {
	Pair<CorefMention, CorefMention> linked_refs =
			phrase.getUnpronoun().get(word.index());
	if (linked_refs == null) {
		return Trees.concatNoun(graph, word);
	} else {
		return linked_refs.second.mentionSpan;
	}
}
 
开发者ID:SeanTater,项目名称:uncc2014watsonsim,代码行数:20,代码来源:Edges.java

示例12: getLinks

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
public static List<Pair<IntTuple, IntTuple>> getLinks(
    Map<Integer, CorefChain> result) {
  List<Pair<IntTuple, IntTuple>> links = new ArrayList<Pair<IntTuple, IntTuple>>();
  MentionComparator comparator = new MentionComparator();

  for(CorefChain c : result.values()) {
    List<CorefMention> s = c.getMentionsInTextualOrder();
    for(CorefMention m1 : s){
      for(CorefMention m2 : s){
        if(comparator.compare(m1, m2)==1) links.add(new Pair<IntTuple, IntTuple>(m1.position, m2.position));
      }
    }
  }
  return links;
}
 
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:16,代码来源:SieveCoreferenceSystem.java

示例13: addCorefMentions

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
/**
 * Adds annotations for coref mentions to the {@link Span}s in the {@link AnalysedText}
 * 
 * @param graph
 * @param at
 * @param sentences
 */
private void addCorefMentions(Map<Integer, CorefChain> graph, AnalysedText at, List<CoreMap> sentences) {
    for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) {
        CorefChain chain = entry.getValue();
        
        CorefMention reprMention = chain.getRepresentativeMention();
        List<CorefMention> mentions = chain.getMentionsInTextualOrder();
        
        /*
         * We don't care about chains with only 1 mention because those contain
         * only the representative mention without any other mention in the text.
         */
        if (mentions.size() < 2) {
            continue;
        }
        
        for (CorefMention mention : mentions) {
            Span mentionedSpan = getSpanFromMention(at, sentences, mention);
            Set<Span> mentionsAsSpans = new HashSet<Span>();
            boolean isRepresentative = mention.equals(reprMention);
            
            for (CorefMention otherMention : mentions) {
                if (!otherMention.equals(mention)) {
                    mentionsAsSpans.add(getSpanFromMention(at, sentences, otherMention));
                }
            }
            
            mentionedSpan.addAnnotation(COREF_ANNOTATION, 
                Value.value(new CorefFeature(isRepresentative, mentionsAsSpans)));
        }
    }
}
 
开发者ID:westei,项目名称:stanbol-stanfordnlp,代码行数:39,代码来源:StanfordNlpAnalyzer.java

示例14: getdCoreferencedText

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
public static ArrayList<String> getdCoreferencedText(String text){
	Annotation document = new Annotation(text);
	pipeline.annotate(document);
	ArrayList<String> sentences = new ArrayList<String>();
	DocumentPreprocessor dp = new DocumentPreprocessor(
		new StringReader(text));
	ArrayList<List<HasWord>> processedText = new ArrayList<List<HasWord>>();
	for (List<HasWord> sentence : dp){
		processedText.add(sentence);
	}
	
	//用 representative mention 把 mention替换掉
	Map<Integer, CorefChain> graph = 
	document.get(CorefChainAnnotation.class);
	for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()){
		CorefChain c = entry.getValue();
		
		CorefMention cm = c.getRepresentativeMention();
		for (Entry<IntPair, Set<CorefMention>> e : 
			c.getMentionMap().entrySet()){
			if (cm.endIndex - cm.startIndex >2){
				continue; //如果representative mention 词数大于2 就不换了
			}
			for(CorefMention mention : e.getValue()){
				perClusterUpdateSen(processedText,
						mention.sentNum,cm.sentNum,
					cm.startIndex,cm.endIndex,
					mention.startIndex,mention.endIndex);
			}
		}
	}
	
	for (List<HasWord> senlist : processedText){
		sentences.add("");
		for (HasWord word:senlist){
			if (!word.toString().equals("")){
				//System.out.print(word.toString()+" ");
				String str = sentences.
						get(sentences.size()-1) + word.toString().toLowerCase()+" ";
				sentences.set(sentences.size()-1, str);
			}
		}
		
		//System.out.println();
	}
	for (int i=0; i < sentences.size(); i++){
		String s = sentences.get(i);
		sentences.set(i, (""+s.charAt(0)).toUpperCase() + s.substring(1)) ;
	}
	return sentences;
}
 
开发者ID:cs-zyluo,项目名称:CausalNet,代码行数:52,代码来源:Coreferencer.java

示例15: resolveCoRef

import edu.stanford.nlp.dcoref.CorefChain.CorefMention; //导入依赖的package包/类
public String resolveCoRef(String text) {

		// to hold resolved string
		String resolved = new String();

		// run the pipeline
		Annotation document = runPipeline(text);

		// get all coref chains and sentences
		Map<Integer, CorefChain> corefs = document.get(CorefChainAnnotation.class);
		List<CoreMap> sentences = document.get(SentencesAnnotation.class);

		// process each sentence
		for (CoreMap sentence : sentences) {

			int curSentIdx = sentence.get(SentenceIndexAnnotation.class);
			List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);

			boolean isPronoun = false;
			for (CoreLabel token : tokens) {

				// process only pronouns
				isPronoun = false;
				String pos = token.get(PartOfSpeechAnnotation.class);
				if (pos.equals("PRP") || pos.equals("PP$")) {
					isPronoun = true;
				}

				Integer corefClustId = token.get(CorefClusterIdAnnotation.class);
				CorefChain chain = corefs.get(corefClustId);

				// if there is no chain to replace
				if (chain == null || chain.getMentionsInTextualOrder().size() == 1 || isPronoun == false) {
					resolved += token.word() + token.after();
				} else {

					int sentIndx = chain.getRepresentativeMention().sentNum - 1;

					CorefMention reprMent = chain.getRepresentativeMention();
					String rootWord = sentences.get(sentIndx)
							.get(TokensAnnotation.class)
							.get(reprMent.headIndex - 1)
							.originalText();

					if (curSentIdx != sentIndx || token.index() < reprMent.startIndex
							|| token.index() > reprMent.endIndex) {
						if (Character.isUpperCase(token.originalText().charAt(0))) {
							rootWord = WordUtils.capitalize(rootWord);
						}
						resolved += rootWord + token.after();
					} else {
						resolved += token.word() + token.after();
					}
				}
			}
		}

		return resolved;
	}
 
开发者ID:sunil3590,项目名称:artificial-guy,代码行数:60,代码来源:NLP.java


注:本文中的edu.stanford.nlp.dcoref.CorefChain.CorefMention类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。