Java TextAnnotation类代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation类的典型用法代码示例。如果您正苦于以下问题：Java TextAnnotation类的具体用法？Java TextAnnotation怎么用？Java TextAnnotation使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

TextAnnotation类属于edu.stanford.nlp.ling.CoreAnnotations包，在下文中一共展示了TextAnnotation类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static void main(String[] args) {

        // 载入自定义的Properties文件
        StanfordCoreNLP pipeline = new StanfordCoreNLP("CoreNLP-chinese.properties");

        // 用一些文本来初始化一个注释。文本是构造函数的参数。
        Annotation annotation;

        annotation = pipeline.process("我爱北京天安门");

        // 从注释中获取CoreMap List，并取第0个值
        List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
        CoreMap sentence = sentences.get(0);

        // 从CoreMap中取出CoreLabel List，逐一打印出来
        List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        System.out.println("字/词");
        System.out.println("-----------------------------");
        for (CoreLabel token : tokens) {
            String word = token.getString(TextAnnotation.class);
//            String pos = token.getString(PartOfSpeechAnnotation.class);
//            String ner = token.getString(NamedEntityTagAnnotation.class);
            System.out.println(word);
        }

    }

开发者ID:huyang1，项目名称:LDA，代码行数:27，代码来源:CoreNLPSegment.java

示例2: TokenizedCoreLabelWrapper

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
/**
 *
 */
public TokenizedCoreLabelWrapper(final CoreLabel cl) {
  this.value = cl.get(ValueAnnotation.class);
  this.text = cl.get(TextAnnotation.class);
  LOGGER.trace("Wrapping token text: {}", this.text);
  this.originalText = cl.get(OriginalTextAnnotation.class);
  this.before = cl.get(BeforeAnnotation.class);
  this.after = cl.get(AfterAnnotation.class);

  this.startSentenceOffset = cl.get(CharacterOffsetBeginAnnotation.class);
  this.endSentenceOffset = cl.get(CharacterOffsetEndAnnotation.class);

  this.startOffset = Optional.ofNullable(cl.get(TokenBeginAnnotation.class));
  this.endOffset = Optional.ofNullable(cl.get(TokenEndAnnotation.class));
  LOGGER.trace("TokenBegin: {}", this.startOffset);
  LOGGER.trace("TokenEnd: {}", this.endOffset);

  this.idx = cl.get(IndexAnnotation.class);
  this.sentenceIdx = cl.get(SentenceIndexAnnotation.class);
  LOGGER.trace("Got sentence idx: {}", this.sentenceIdx);
}

开发者ID:hltcoe，项目名称:concrete-stanford-deprecated2，代码行数:24，代码来源:TokenizedCoreLabelWrapper.java

示例3: test

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
@Test
  public void test() throws Exception {
    ConcreteStanfordTokensSentenceAnalytic firstAnalytic = new ConcreteStanfordTokensSentenceAnalytic();
    TokenizedCommunication tc = firstAnalytic.annotate(this.comm);
    List<CoreMap> allCmList = new ArrayList<>();
    tc.getSections().forEach(sect -> {
      LOGGER.debug("Annotation section: {}", sect.getUuid().getUuidString());
      // TextSpan ts = sect.getTextSpan();
      // String sectText = this.txt.substring(ts.getStart(), ts.getEnding());
      allCmList.addAll(ConcreteToStanfordMapper.concreteSectionToCoreMapList(sect, this.txt));
    });

    Annotation at = new Annotation(allCmList);
    at.set(TextAnnotation.class, this.txt);
//    (StanfordCoreNLP.getExistingAnnotator("pos")).annotate(at);
//    (StanfordCoreNLP.getExistingAnnotator("lemma")).annotate(at);
//    (StanfordCoreNLP.getExistingAnnotator("ner")).annotate(at);
//    (StanfordCoreNLP.getExistingAnnotator("parse")).annotate(at);
//    (StanfordCoreNLP.getExistingAnnotator("dcoref")).annotate(at);
    // this.pipeline.annotate(at);
//    LOGGER.info("Coref results:");
    LOGGER.info(at.toShorterString(new String[0]));
    for (CoreMap cm : allCmList) {
      LOGGER.info("Got CoreMap: {}", cm.toShorterString(new String[0]));
    }
  }

开发者ID:hltcoe，项目名称:concrete-stanford-deprecated2，代码行数:27，代码来源:CorefTest.java

示例4: parsingTest

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
private static void parsingTest()
{
	//	String exampleText = "The software developer who inserted a major security flaw into OpenSSL 1.2.4.8, using the file foo/bar/blah.php has said the error was \"quite trivial\" despite the severity of its impact, according to a new report.  The Sydney Morning Herald published an interview today with Robin Seggelmann, who added the flawed code to OpenSSL, the world's most popular library for implementing HTTPS encryption in websites, e-mail servers, and applications. The flaw can expose user passwords and potentially the private key used in a website's cryptographic certificate (whether private keys are at risk is still being determined). This is a new paragraph about Apache Tomcat's latest update 7.0.1.";
	String exampleText = "Microsoft Windows 7 before SP1 has Sun Java cross-site scripting vulnerability Java SE in file.php (refer to CVE-2014-1234).";
	//	String exampleText = "Oracle DBRM has vulnerability in ABCD plug-in via abcd.1234 (found on abcd.com).";
	EntityLabeler labeler = new EntityLabeler();
	Annotation doc = labeler.getAnnotatedDoc("My Doc", exampleText);
	
	List<CoreMap> sentences = doc.get(SentencesAnnotation.class);

	for ( CoreMap sentence : sentences) 
	{
		for ( CoreLabel token : sentence.get(TokensAnnotation.class)) 
		{
			System.out.println(token.get(TextAnnotation.class) + "\t" + token.get(CyberAnnotation.class));
		}
		
		System.out.println("Entities:\n" + sentence.get(CyberEntityMentionsAnnotation.class));
	
		System.out.println("Parse Tree:\n" + sentence.get(TreeAnnotation.class));		
	}
}

开发者ID:stucco，项目名称:relation-bootstrap，代码行数:23，代码来源:Test.java

示例5: getExampleTextFromSerGz

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
private static String getExampleTextFromSerGz(File f)
{
	String result = "";
	
		Annotation deserDoc = EntityLabeler.deserializeAnnotatedDoc(f.getAbsolutePath());
		List<CoreMap> sentences = deserDoc.get(SentencesAnnotation.class);
		for (int sentencenum = 0; sentencenum < sentences.size(); sentencenum++) 
		{
			CoreMap sentence = sentences.get(sentencenum);
			
			List<CoreLabel> labels = sentence.get(TokensAnnotation.class);
			
		 	for (int i = 0; i < labels.size(); i++) 
		 	{
		 		CoreLabel token = labels.get(i);
		 		String tokenstring = token.get(TextAnnotation.class);
		 		result += " " + tokenstring;
		 	}
		 	result = result.trim() + "\n";
		}
		
	return result;
}

开发者ID:stucco，项目名称:relation-bootstrap，代码行数:24，代码来源:Test.java

示例6: main

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
	// String parse = "\nasfd\n\ndaf";
	// String[] lines = parse.split("\n");
	// System.out.println(lines.length);
	// System.exit(0);
	X.prop.put("tokenized", "true");
	X.prop.put("singleSentences", "true");
	initPipeline(true, true);
	Annotation ann = new Annotation(
			"BOSTON 69 65 .515 5 1/2\n1. Michelle Freeman ( Jamaica ) 12.71 seconds");
	pipeline.annotate(ann);
	for (CoreMap sent : ann.get(SentencesAnnotation.class)) {
		for (CoreLabel token : sent.get(TokensAnnotation.class)) {
			logger.info(token.get(TextAnnotation.class));
		}
		logger.info("sentence done");
	}

}

开发者ID:zhangcongle，项目名称:NewsSpikeRe，代码行数:20，代码来源:Preprocessing.java

示例7: posTagLineToArray

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
/**
 * 
 * POS-tag sentence and return an array of Pairs that contain the POS-tag and word.
 * @param line
 * @return 
 */
public static fig.basic.Pair<String, String>[] posTagLineToArray(String line)
{
    Annotation document = new Annotation(line);
    pipeline.annotate(document);    
    List<fig.basic.Pair<String, String>> out = new ArrayList<>();
    for(CoreMap sentence: document.get(SentencesAnnotation.class)) 
    {
        List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
        for(CoreLabel token : tokens)
        {
            out.add(new fig.basic.Pair(token.get(PartOfSpeechAnnotation.class), token.get(TextAnnotation.class))); 
        }
    }        
    return out.toArray(new fig.basic.Pair[0]);
}

开发者ID:sinantie，项目名称:PLTAG，代码行数:22，代码来源:PosTagger.java

示例8: main

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
//		String exampleText = "The software developer who inserted a major security flaw into OpenSSL 1.2.4.8, using the file foo/bar/blah.php has said the error was \"quite trivial\" despite the severity of its impact, according to a new report.  The Sydney Morning Herald published an interview today with Robin Seggelmann, who added the flawed code to OpenSSL, the world's most popular library for implementing HTTPS encryption in websites, e-mail servers, and applications. The flaw can expose user passwords and potentially the private key used in a website's cryptographic certificate (whether private keys are at risk is still being determined). This is a new paragraph about Apache Tomcat's latest update 7.0.1.";
		String exampleText = "Microsoft Windows 7 before SP1 has Sun Java cross-site scripting vulnerability Java SE in file.php (refer to CVE-2014-1234).";
//		String exampleText = "Oracle DBRM has vulnerability in ABCD plug-in via abcd.1234 (found on abcd.com).";
		EntityLabeler labeler = new EntityLabeler();
		Annotation doc = labeler.getAnnotatedDoc("My Doc", exampleText);
		
		List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
		for ( CoreMap sentence : sentences) {
			for ( CoreLabel token : sentence.get(TokensAnnotation.class)) {
				System.out.println(token.get(TextAnnotation.class) + "\t" + token.get(CyberAnnotation.class));
			}
			
			System.out.println("Entities:\n" + sentence.get(CyberEntityMentionsAnnotation.class));
			
			System.out.println("Parse Tree:\n" + sentence.get(TreeAnnotation.class));		
		}
		
	}

开发者ID:stucco，项目名称:entity-extractor，代码行数:20，代码来源:EntityLabeler.java

示例9: main

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
/**
 * @param args
 */
public static void main(String[] args) {
	String testSentence = "Microsoft Windows XP before 2.8 has cross-site scripting vulnerability in file.php (refer to CVE-2014-1234).";
	EntityLabeler labeler = new EntityLabeler();
	Annotation doc = labeler.getAnnotatedDoc("My Doc", testSentence);
	
						
	List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
	for ( CoreMap sentence : sentences) {
		// Label cyber entities appropriately
		for ( CoreLabel token : sentence.get(TokensAnnotation.class)) {
			System.out.println(token.get(TextAnnotation.class) + "\t\t" + token.get(CyberAnnotation.class));
		}
		System.out.println();
	}

	RelationExtractor rx = new RelationExtractor("src/main/resources/patterns_relations_abbrev.json");
	System.out.println(rx.createSubgraph(doc, "CNN"));
	
}

开发者ID:stucco，项目名称:relation-extractor，代码行数:23，代码来源:RelationExtractor.java

示例10: annotate

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
@Override
public void annotate(Annotation annotation) {
  if (verbose) {
    timer.start();
    System.err.print("Adding normalized token annotation...");
  }

  if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) {
    List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
      List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
      for (int i = 0; i < tokens.size(); i++) {
        CoreLabel token = tokens.get(i);
        String text = token.get(TextAnnotation.class);
        String lemma = token.get(LemmaAnnotation.class);
        this.addLemma(NormalizerAnnotation.class, token, lemma, text);
      }
    }
  } else {
    throw new RuntimeException("unable to find words/tokens in: " + annotation);
  }

  if (verbose)
    timer.stop("done.");
}

开发者ID:begab，项目名称:kpe，代码行数:26，代码来源:NormalizerAnnotator.java

示例11: adjustCharacterOffsets

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static List<CoreMap> adjustCharacterOffsets(List<CoreMap> sentences, boolean setOriginalText)
{
	List<CoreMap> sentencesCopy = sentences;

	for (CoreMap sentence : sentencesCopy)
	{
		List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
		int characterCount = 0;
		for (int i = 0; i < sentenceTokens.size(); i++)
		{
			CoreLabel token = sentenceTokens.get(i);
			if (setOriginalText)
			{
				token.set(CoreAnnotations.OriginalTextAnnotation.class, token.get(CoreAnnotations.TextAnnotation.class) + " ");
			}
			int startCharacterCount = characterCount;
			int endCharacterCount = startCharacterCount + token.get(CoreAnnotations.OriginalTextAnnotation.class).length();
			token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, startCharacterCount);
			token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endCharacterCount);
			sentenceTokens.set(i, token);
			characterCount = endCharacterCount;
		}
		sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
	}
	return sentencesCopy;
}

开发者ID:dmnapolitano，项目名称:stanford-thrift，代码行数:27，代码来源:CoreNLPThriftUtil.java

示例12: makeVertex

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
private IndexedWord makeVertex(String word) {
  Integer index; // initialized below
  Pair<String, Integer> wordAndIndex = readWordAndIndex(word);
  if (wordAndIndex != null) {
    word = wordAndIndex.first();
    index = wordAndIndex.second();
  } else {
    index = getNextFreeIndex();
  }
  indexesUsed.add(index);
  // Note that, despite the use of indexesUsed and getNextFreeIndex(),
  // nothing is actually enforcing that no indexes are used twice. This
  // could occur if some words in the string representation being parsed
  // come with index markers and some do not.
  IndexedWord ifl = new IndexedWord(null, 0, index);
  // System.err.println("SemanticGraphParsingTask>>> word = " + word);
  // System.err.println("SemanticGraphParsingTask>>> index = " + index);
  // System.err.println("SemanticGraphParsingTask>>> indexesUsed = " +
  // indexesUsed);
  String[] wordAndTag = word.split("/");
  ifl.set(TextAnnotation.class, wordAndTag[0]);
  if (wordAndTag.length > 1)
    ifl.set(PartOfSpeechAnnotation.class, wordAndTag[1]);
  return ifl;
}

开发者ID:amark-india，项目名称:eventspotter，代码行数:26，代码来源:SemanticGraph.java

示例13: sentenceSplitter

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static List<String> sentenceSplitter(List<CoreMap> sentences) {
	List<String> sentenceList = new ArrayList<String>();
	for (CoreMap sentence : sentences) {
		String sentenceString = "";
		for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
			String word = token.get(TextAnnotation.class);
			sentenceString += word + " ";
		}
		sentenceList.add(sentenceString);
	}
	return sentenceList;
}

开发者ID:cgraywang，项目名称:TextHIN，代码行数:13，代码来源:TextParser.java

示例14: ResolvePronoun

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static Map<Pair<Integer,Integer>,String> ResolvePronoun(String Text) throws Exception {
	  //document text.
 String s1 = Text;
 //annotating document.
 Annotation Document = new Annotation(s1);
 processMain.pipeline.annotate(Document);

 //This is the coreference Chain obtained from the text.
 Map<Integer,CorefChain> graph = Document.get(CorefChainAnnotation.class);

 Map<Pair<Integer,Integer>,String> answer = new HashMap<Pair<Integer,Integer>,String>();

 //for all coref chains extract the representative mention and their corresponding references.
 for(Map.Entry<Integer, CorefChain> entry: graph.entrySet()){
  CorefChain c = entry.getValue();
  if(c.getMentionsInTextualOrder().size()<=1){
	  continue;
  }
  //representative mention.
  CorefMention cm = c.getRepresentativeMention();
  String clust = "";

  //all references for the representative.
  List<CoreLabel> tks = Document.get(SentencesAnnotation.class).get(cm.sentNum-1).get(TokensAnnotation.class);
  for(int i = cm.startIndex-1; i < cm.endIndex-1; i++)
              clust += tks.get(i).get(TextAnnotation.class) + " ";
          clust = clust.trim();
          for(CorefMention m : c.getMentionsInTextualOrder()){
          	//putting all the tupples in the Map.
              answer.put(new Pair<Integer,Integer>(m.sentNum-1,m.startIndex-1), clust);
          }
 }
 return answer;
}

开发者ID:kunal15595，项目名称:smart-question-answering-nlp，代码行数:35，代码来源:PronounResolution.java

示例15: concreteSectionToCoreMapList

import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static List<CoreMap> concreteSectionToCoreMapList(final Section sect, final String commText) {
  List<CoreMap> toRet = new ArrayList<>();
  List<Sentence> sentList = sect.getSentenceList();
  int tokOffset = 0;
  for (int i = 0; i < sentList.size(); i++) {
    Sentence st = sentList.get(i);
    CoreMap cm = new ArrayCoreMap();
    cm.set(SentenceIndexAnnotation.class, i);
    final TextSpan sts = st.getTextSpan();
    final int sentCharStart = sts.getStart();
    final int sentCharEnd = sts.getEnding();
    LOGGER.debug("Setting stanford sentence BeginChar = {}", sentCharStart);
    cm.set(CharacterOffsetBeginAnnotation.class, sentCharStart);
    LOGGER.debug("Setting stanford sentence EndChar = {}", sentCharEnd);
    cm.set(CharacterOffsetEndAnnotation.class, sentCharEnd);
    String sectText = commText.substring(sentCharStart, sentCharEnd);
    LOGGER.debug("Setting text: {}", sectText);
    cm.set(TextAnnotation.class, sectText);

    Tokenization tkz = st.getTokenization();
    List<CoreLabel> clList = tokenizationToCoreLabelList(tkz, i, sentCharStart);
    final int maxIdx = clList.size();
    LOGGER.debug("Setting stanford sentence token begin: {}", tokOffset);
    cm.set(TokenBeginAnnotation.class, tokOffset);
    final int tokEnd = tokOffset + maxIdx;
    LOGGER.debug("Setting stanford sentence token end: {}", tokEnd);
    cm.set(TokenEndAnnotation.class, tokEnd);
    cm.set(TokensAnnotation.class, clList);

    tokOffset = tokEnd;
    toRet.add(cm);
  }

  return toRet;
}

开发者ID:hltcoe，项目名称:concrete-stanford-deprecated2，代码行数:36，代码来源:ConcreteToStanfordMapper.java

注：本文中的edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。