当前位置: 首页>>代码示例>>Java>>正文


Java CorefChainAnnotation类代码示例

本文整理汇总了Java中edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation的典型用法代码示例。如果您正苦于以下问题:Java CorefChainAnnotation类的具体用法?Java CorefChainAnnotation怎么用?Java CorefChainAnnotation使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


CorefChainAnnotation类属于edu.stanford.nlp.dcoref.CorefCoreAnnotations包,在下文中一共展示了CorefChainAnnotation类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: _unpronoun

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
private static Map<Integer, Pair<CorefMention, CorefMention>> _unpronoun(Phrase p) {
	Stream<Pair<CorefMention, CorefMention>> s =
			Stream.of(p.memo(Phrase.coreNLP).get(CorefChainAnnotation.class))
		.filter(Objects::nonNull)  // Do nothing with an empty map
		.flatMap(chains -> chains.entrySet().stream()) // Disassemble the map
	    .flatMap(entry -> {
			// Link each entry to it's main mention
			CorefMention main = entry.getValue().getRepresentativeMention();
			return entry.getValue().getMentionsInTextualOrder().stream()
				.filter(mention -> mention != main)
				.map(mention -> makePair(mention, main));
		});
	// Type inference chokes here so write it down then return.
	return s.collect(HashMap::new,
			(m, pair) -> m.put(pair.first.headIndex, pair),
			(l, r) -> {});
}
 
开发者ID:SeanTater,项目名称:uncc2014watsonsim,代码行数:18,代码来源:Phrase.java

示例2: test

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
@Test
public void test() throws Exception {
  String text = "Johns Hopkins University was started by Johns Hopkins. Johns Hopkins was a good man.";
  Properties props = new Properties();
  props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
  StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

  // create an empty Annotation just with the given text
  Annotation document = new Annotation(text);

  // run all Annotators on this text
  pipeline.annotate(document);

  // This is the coreference link graph
  // Each chain stores a set of mentions that link to each other,
  // along with a method for getting the most representative mention
  // Both sentence and token offsets start at 1!
  Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
  graph.entrySet().forEach(e -> {
    LOGGER.info("Got coref key: {}", e.getKey());
    LOGGER.info("Got coref val: {}", e.getValue());
    e.getValue().getMentionsInTextualOrder().forEach(m -> LOGGER.info("Got mention: {}", m.toString()));
  });

  LOGGER.info("Got document: {}", document);
  LOGGER.info("Got document: {}", document.toString());

  AnnotateNonTokenizedConcrete tk = new AnnotateNonTokenizedConcrete();
  StanfordPostNERCommunication postNER = tk.annotate(this.nytComm);
  postNER.getEntityMentions().forEach(em -> LOGGER.info("Got EM: {}", em));
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:32,代码来源:AnnotatedNYTTest.java

示例3: getCorefInfo

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
List getCorefInfo(Annotation doc) {
		Map<Integer, CorefChain> corefChains = doc.get(CorefChainAnnotation.class);
//		List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
		List entities = new ArrayList();
		for (CorefChain chain : corefChains.values()) {
			List mentions = new ArrayList();
			CorefChain.CorefMention representative = chain.getRepresentativeMention();
			for (CorefChain.CorefMention corement : chain.getMentionsInTextualOrder()) {
				Map outment = new HashMap();
				outment.put("sentence", corement.sentNum-1);
				outment.put("tokspan_in_sentence", Lists.newArrayList(
								corement.startIndex-1, corement.endIndex-1));
				outment.put("head",corement.headIndex-1);
				outment.put("gender", corement.gender.toString());
				outment.put("animacy", corement.animacy.toString());
				outment.put("number", corement.number.toString());
				outment.put("mentiontype", corement.mentionType.toString());
				outment.put("mentionid", corement.mentionID);
				if (representative!=null && corement.mentionID==representative.mentionID) {
					outment.put("representative", true);
				}
				mentions.add(outment);
			}
			Map entity = ImmutableMap.builder()
					.put("mentions", mentions)
					.put("entityid", chain.getChainID())
					.build();
			entities.add(entity);
		}
		return entities;
	}
 
开发者ID:UKPLab,项目名称:tac2015-event-detection,代码行数:32,代码来源:JsonPipeline.java

示例4: parseDocumentWithCoref

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public CorenlpParsedArticle parseDocumentWithCoref(int sectionId, String text, Gson gson) {

		// l[]: article_name (String), section_id (int), text
		CorenlpParsedArticle pa = new CorenlpParsedArticle();
		pa.sectionId = sectionId;
		Annotation document = new Annotation(text);
		pipeline.annotate(document);
		List<CoreMap> sentences = document.get(SentencesAnnotation.class);
		pa.numSentence = sentences.size();
		for (int i = 0; i < sentences.size(); i++) {
			CoreMap sentence = sentences.get(i);
			ParsedSentence ps = new ParsedSentence(pa.sectionId, i, sentence, gsf);
			pa.parsedsentence.add(ps);
		}
		{
			Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
			// D.p(graph.size());
			for (Entry<Integer, CorefChain> e : graph.entrySet()) {
				int chainid = e.getKey();
				CorefChain cc = e.getValue();
				CorefResult cr = new CorefResult();
				pa.corefchains.add(cr);
				// for (CorefMention m : cc.getCorefMentions()) {
				// // for (CorefMention m : cc.getMentionsInTextualOrder()) {
				// cr.names.add(m.mentionSpan);
				// cr.chain.add(new int[] { m.sentNum - 1, m.startIndex - 1,
				// m.endIndex - 1 });
				// // D.p(m.toString(), cc.getChainID());
				// }
			}
			graph = null;
		}
		document = null;
		sentences = null;
		return pa;
	}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:37,代码来源:CoreNlpPipeline.java

示例5: parseDocumentJsonWithCoref

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public String parseDocumentJsonWithCoref(int sectionId, String text, Gson gson) {

		// l[]: article_name (String), section_id (int), text
		CorenlpParsedArticle pa = new CorenlpParsedArticle();
		pa.sectionId = sectionId;
		Annotation document = new Annotation(text);
		pipeline.annotate(document);
		List<CoreMap> sentences = document.get(SentencesAnnotation.class);
		pa.numSentence = sentences.size();
		for (int i = 0; i < sentences.size(); i++) {
			CoreMap sentence = sentences.get(i);
			ParsedSentence ps = new ParsedSentence(pa.sectionId, i, sentence, gsf);
			pa.parsedsentence.add(ps);
		}
		String result = "";
		{
			Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
			// D.p(graph.size());
			for (Entry<Integer, CorefChain> e : graph.entrySet()) {
				int chainid = e.getKey();
				CorefChain cc = e.getValue();
				CorefResult cr = new CorefResult();
				pa.corefchains.add(cr);
				// for (CorefMention m : cc.getCorefMentions()) {
				// // for (CorefMention m : cc.getMentionsInTextualOrder()) {
				// cr.names.add(m.mentionSpan);
				// cr.chain.add(new int[] { m.sentNum - 1, m.startIndex - 1,
				// m.endIndex - 1 });
				// // D.p(m.toString(), cc.getChainID());
				// }
			}
			graph = null;
			result = gson.toJson(pa);

		}
		document = null;
		sentences = null;
		return result;
	}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:40,代码来源:CoreNlpPipeline.java

示例6: toCoreferenceMap

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
/**
 * Transform an Annotation instance into a map of coreference clusters
 * 
 * @param annotation
 * @return
 */
public static Map<Integer, List<CorefMention>> toCoreferenceMap(Annotation annotation) {
	HashMap<Integer, List<CorefMention>> corefs = new HashMap<Integer, List<CorefMention>>();
	for (CorefChain chain : annotation.get(CorefChainAnnotation.class).values()) {
		CorefMention m1 = chain.getRepresentativeMention();
		corefs.put(m1.corefClusterID, new ArrayList<CorefMention>());
		corefs.get(m1.corefClusterID).add(m1);
		for (CorefMention m2 : chain.getMentionsInTextualOrder())
			if (m2 != m1)
				corefs.get(m2.corefClusterID).add(m2);
	}
	return corefs;
}
 
开发者ID:hakchul77,项目名称:irnlp_toolkit,代码行数:19,代码来源:StanfordNlpWrapper.java

示例7: getdCoreferencedText

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static ArrayList<String> getdCoreferencedText(String text){
	Annotation document = new Annotation(text);
	pipeline.annotate(document);
	ArrayList<String> sentences = new ArrayList<String>();
	DocumentPreprocessor dp = new DocumentPreprocessor(
		new StringReader(text));
	ArrayList<List<HasWord>> processedText = new ArrayList<List<HasWord>>();
	for (List<HasWord> sentence : dp){
		processedText.add(sentence);
	}
	
	//用 representative mention 把 mention替换掉
	Map<Integer, CorefChain> graph = 
	document.get(CorefChainAnnotation.class);
	for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()){
		CorefChain c = entry.getValue();
		
		CorefMention cm = c.getRepresentativeMention();
		for (Entry<IntPair, Set<CorefMention>> e : 
			c.getMentionMap().entrySet()){
			if (cm.endIndex - cm.startIndex >2){
				continue; //如果representative mention 词数大于2 就不换了
			}
			for(CorefMention mention : e.getValue()){
				perClusterUpdateSen(processedText,
						mention.sentNum,cm.sentNum,
					cm.startIndex,cm.endIndex,
					mention.startIndex,mention.endIndex);
			}
		}
	}
	
	for (List<HasWord> senlist : processedText){
		sentences.add("");
		for (HasWord word:senlist){
			if (!word.toString().equals("")){
				//System.out.print(word.toString()+" ");
				String str = sentences.
						get(sentences.size()-1) + word.toString().toLowerCase()+" ";
				sentences.set(sentences.size()-1, str);
			}
		}
		
		//System.out.println();
	}
	for (int i=0; i < sentences.size(); i++){
		String s = sentences.get(i);
		sentences.set(i, (""+s.charAt(0)).toUpperCase() + s.substring(1)) ;
	}
	return sentences;
}
 
开发者ID:cs-zyluo,项目名称:CausalNet,代码行数:52,代码来源:Coreferencer.java

示例8: resolveCoRef

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public String resolveCoRef(String text) {

		// to hold resolved string
		String resolved = new String();

		// run the pipeline
		Annotation document = runPipeline(text);

		// get all coref chains and sentences
		Map<Integer, CorefChain> corefs = document.get(CorefChainAnnotation.class);
		List<CoreMap> sentences = document.get(SentencesAnnotation.class);

		// process each sentence
		for (CoreMap sentence : sentences) {

			int curSentIdx = sentence.get(SentenceIndexAnnotation.class);
			List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);

			boolean isPronoun = false;
			for (CoreLabel token : tokens) {

				// process only pronouns
				isPronoun = false;
				String pos = token.get(PartOfSpeechAnnotation.class);
				if (pos.equals("PRP") || pos.equals("PP$")) {
					isPronoun = true;
				}

				Integer corefClustId = token.get(CorefClusterIdAnnotation.class);
				CorefChain chain = corefs.get(corefClustId);

				// if there is no chain to replace
				if (chain == null || chain.getMentionsInTextualOrder().size() == 1 || isPronoun == false) {
					resolved += token.word() + token.after();
				} else {

					int sentIndx = chain.getRepresentativeMention().sentNum - 1;

					CorefMention reprMent = chain.getRepresentativeMention();
					String rootWord = sentences.get(sentIndx)
							.get(TokensAnnotation.class)
							.get(reprMent.headIndex - 1)
							.originalText();

					if (curSentIdx != sentIndx || token.index() < reprMent.startIndex
							|| token.index() > reprMent.endIndex) {
						if (Character.isUpperCase(token.originalText().charAt(0))) {
							rootWord = WordUtils.capitalize(rootWord);
						}
						resolved += rootWord + token.after();
					} else {
						resolved += token.word() + token.after();
					}
				}
			}
		}

		return resolved;
	}
 
开发者ID:sunil3590,项目名称:artificial-guy,代码行数:60,代码来源:NLP.java

示例9: processParses

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
/**
 * Start from parsed trees, and run the coref.
 */
public List<EntityMention> processParses(Collection<Tree> trees) {
  CoreLabelTokenFactory tokenfactory = new CoreLabelTokenFactory();
  List<EntityMention> entities = null;

  // Create an empty Annotation
  Annotation document = new Annotation("");

  try {
    // Setup the sentences using CoreMaps and CoreLabels.
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    for( Tree tree : trees ) {
      List<CoreLabel> sentence = new ArrayList<CoreLabel>();
      CoreMap sent = new ArrayCoreMap(1);
      sent.set(TokensAnnotation.class,sentence);
      sentences.add(sent);

      // Now add the leaves from the trees as separate tokens.
      List<String> strs = TreeOperator.stringLeavesFromTree(tree);
      List<String> pos = TreeOperator.posTagsFromTree(tree);
      int start = 0, index = 0;
      for( String str : strs ) {
        CoreLabel label = tokenfactory.makeToken(str, start, start+str.length());
        start += str.length() + 1;
        label.set(PartOfSpeechAnnotation.class, pos.get(index++));
        sentence.add(label);
      }

      // Now add the parse tree.
      sent.set(TreeAnnotation.class, tree);
    }
    // Add all sentences as an annotation to the document.
    document.set(CoreAnnotations.SentencesAnnotation.class, sentences);

    //    for( CoreMap sen : sentences ) {
    //      System.out.println(sen);
    //    }

    // NOTE: You can see each annotator get created in the StanfordCoreNLP.java class. 
    //       Look at its function getDefaultAnnotatorPool()
    pipeline.annotate(document);

    //    System.out.println("AFTER");
    //    for( CoreMap sen : sentences )
    //      System.out.println(sen);      

    // This is the coreference link graph
    // Each chain stores a set of mentions that link to each other,
    // along with a method for getting the most representative mention
    // Both sentence and token offsets start at 1!
    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    //    for( Integer id : graph.keySet() ) System.out.println(id + "\t" + graph.get(id));
    entities = extractEntities(graph);
    
  } catch( Exception ex ) {
    System.out.println("--STANFORD COREF EXCEPTION-- Parses skipped...");
    ex.printStackTrace();
  }
    
  return entities;
}
 
开发者ID:nchambers,项目名称:schemas,代码行数:64,代码来源:CorefStanford.java

示例10: runCoreNLP

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static void runCoreNLP()
{
	Properties props = new Properties();
	props.put("annotators", "tokenize,ssplit");//, pos, lemma, ner");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

	// read some text in the text variable
	String text = "Hello how are you Ramesh"; // Add your text here!

			// create an empty Annotation just with the given text
	Annotation document = new Annotation(text);

	// run all Annotators on this text
	pipeline.annotate(document);

	// these are all the sentences in this document
	// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);

	for(CoreMap sentence: sentences) {
		// traversing the words in the current sentence
		// a CoreLabel is a CoreMap with additional token-specific methods
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			// this is the text of the token
			//String word = token.get(TextAnnotation.class);
			// this is the POS tag of the token
			//String pos = token.get(PartOfSpeechAnnotation.class);
			// this is the NER label of the token
			//String ne = token.get(NamedEntityTagAnnotation.class);
			//System.out.println(token+"/"+ne);
		}

		// this is the parse tree of the current sentence
		Tree tree = sentence.get(TreeAnnotation.class);

		// this is the Stanford dependency graph of the current sentence
		SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
	}

	// This is the coreference link graph
	// Each chain stores a set of mentions that link to each other,
	// along with a method for getting the most representative mention
	// Both sentence and token offsets start at 1!
	Map<Integer, CorefChain> graph = 
			document.get(CorefChainAnnotation.class);
}
 
开发者ID:siddBanPsu,项目名称:WikiKreator,代码行数:47,代码来源:StanfordCoreNLPRunner.java

示例11: main

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
 // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    
    // read some text in the text variable
    String text = "Ricardo Usbeck sits at his table. He is a researcher. entity is an extinct genus of sauropterygian type."; // Add your text here!
    
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);
    
    // run all Annotators on this text
    pipeline.annotate(document);
    
    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    
    for(CoreMap sentence: sentences) {
      // traversing the words in the current sentence
      // a CoreLabel is a CoreMap with additional token-specific methods
      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
        // this is the text of the token
        String word = token.get(TextAnnotation.class);
        // this is the POS tag of the token
        String pos = token.get(PartOfSpeechAnnotation.class);
        // this is the NER label of the token
        String ne = token.get(NamedEntityTagAnnotation.class);
        System.out.println(word + "\t" + pos + "\t" + ne);
      }

      // this is the parse tree of the current sentence
      Tree tree = sentence.get(TreeAnnotation.class);
      System.out.println(tree.toString());

      // this is the Stanford dependency graph of the current sentence
      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
      System.out.println(dependencies.toString());
    }

    // This is the coreference link graph
    // Each chain stores a set of mentions that link to each other,
    // along with a method for getting the most representative mention
    // Both sentence and token offsets start at 1!
    Map<Integer, CorefChain> graph = 
      document.get(CorefChainAnnotation.class);
    System.out.println(graph.toString());
}
 
开发者ID:dice-group,项目名称:Cetus,代码行数:50,代码来源:CorefTest.java

示例12: process

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
public static void process() {
        // 中文处理需要添加配置文件
        String[] args = new String[] {"-props", "StanfordCoreNLP-chinese.properties"
                //"edu/stanford/nlp/hcoref/properties/zh-coref-default.properties"  // 中文指代消解
                };
        
        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
        //Properties props = new Properties();
        // tokenize: 分词;ssplit:分句;pos:词性标注;lemma:获取词原型;parse:句法解析(含依存句法);dcoref:同义指代; sentiment 情感分析
        //props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");
        Properties props = StringUtils.argsToProperties(args);
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

        // read some text in the text variable
        //String text = "Stanford University is located in California. It is a great university. Meg is the 2nd female CEO of HP! This is a great PC. This PC is not so good after all! 今天天气真好。"; // Add your text here!
        String text = "一些盗版制品经营者为了应付和躲避打击,经营手法更为隐蔽。";
        //  approach 1
        Annotation  document = pipeline.process(text);
        
        // approach 2
//        // create an empty Annotation just with the given text
//        Annotation document = new Annotation(text);
//        // run all Annotators on this text
//        pipeline.annotate(document);

        // these are all the sentences in this document
        // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        // 遍历所有句子,输出每一句的处理结果  
        for (CoreMap sentence : sentences) {
            System.out.println("--------------------- Begin of one sentence--------------------------");
            System.out.println(sentence.toString());
            
            // 遍历每一个词traversing the words in the current sentence
            // a CoreLabel is a CoreMap with additional token-specific methods
            for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // TODO 还有哪些class? 
                // this is the text of the token  拆分后的单词
                String word = token.get(TextAnnotation.class);
                // this is the POS tag of the token 词性:动词VBZ,副词,名词NNP,形容词JJ,冠词DT......
                String pos = token.get(PartOfSpeechAnnotation.class);
                // this is the NER label of the token 命名实体。 例如:ORGANIZATION,O,LOCATION,DATE,TIME,PERSON 目的是识别语料中人名、地名、组织机构名等命名实体
                String ne = token.get(NamedEntityTagAnnotation.class);
                System.out.println("word = " + word + " pos = " + pos + " ne = " + ne);
            }
//            // this is the parse tree of the current sentence
//            Tree treeAnnotation = sentence.get(TreeAnnotation.class);
//            System.out.println("TreeAnnotation = " + treeAnnotation);
//            // 先获取当前句子的依存句法分析结果this is the Stanford dependency graph of the current sentence
//            SemanticGraph dependencies = sentence
//                    .get(CollapsedCCProcessedDependenciesAnnotation.class);
            
            // 情感分析(不支持中文)
            Tree treeSentiment = sentence
                    .get(SentimentAnnotatedTree.class);
            int sentiment = RNNCoreAnnotations.getPredictedClass(treeSentiment);
            System.out.println(printSentiment(sentiment));
            System.out.println("--------------------- End of one sentence--------------------------");
        }

        // This is the coreference link graph
        // Each chain stores a set of mentions that link to each other,
        // along with a method for getting the most representative mention
        // Both sentence and token offsets start at 1!
        // coreference 表示指代关系
        Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
        System.out.println(graph);
    }
 
开发者ID:Terry-Shi,项目名称:corenlp_tutorial,代码行数:69,代码来源:CoreNLPMain.java

示例13: testPipeline

import edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation; //导入依赖的package包/类
/**
 * Test from http://nlp.stanford.edu/software/corenlp.shtml
 */
@Test
public void testPipeline() {

	// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
	Properties props = new Properties();
	props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);



	// create an empty Annotation just with the given text
	Annotation document = new Annotation(text);

	// run all Annotators on this text
	pipeline.annotate(document);

	// these are all the sentences in this document
	// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);

	StringBuffer sentenceStringBuffer = new StringBuffer();
	for(CoreMap sentence: sentences) {
		// traversing the words in the current sentence
		// a CoreLabel is a CoreMap with additional token-specific methods
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			// this is the text of the token
			String word = token.get(TextAnnotation.class);
			// this is the POS tag of the token
			String pos = token.get(PartOfSpeechAnnotation.class);
			// this is the NER label of the token
			String ne = token.get(NamedEntityTagAnnotation.class);    
			sentenceStringBuffer.append(word);
			sentenceStringBuffer.append("/");
			sentenceStringBuffer.append(pos);
			sentenceStringBuffer.append("/");
			sentenceStringBuffer.append(ne);
			sentenceStringBuffer.append(" ");
		}
		System.out.println(sentenceStringBuffer.toString());
		sentenceStringBuffer = new StringBuffer();

		// this is the parse tree of the current sentence
		Tree tree = sentence.get(TreeAnnotation.class);

		// this is the Stanford dependency graph of the current sentence
		SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
	}

	// This is the coreference link graph
	// Each chain stores a set of mentions that link to each other,
	// along with a method for getting the most representative mention
	// Both sentence and token offsets start at 1!
	Map<Integer, CorefChain> graph = 
			document.get(CorefChainAnnotation.class);
}
 
开发者ID:nicolashernandez,项目名称:dev-star,代码行数:59,代码来源:TestStanford.java


注:本文中的edu.stanford.nlp.dcoref.CorefCoreAnnotations.CorefChainAnnotation类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。