Java TokensAnnotation類代碼示例

本文整理匯總了Java中edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation類的典型用法代碼示例。如果您正苦於以下問題：Java TokensAnnotation類的具體用法？Java TokensAnnotation怎麽用？Java TokensAnnotation使用的例子？那麽, 這裏精選的類代碼示例或許可以為您提供幫助。

TokensAnnotation類屬於edu.stanford.nlp.ling.CoreAnnotations包，在下文中一共展示了TokensAnnotation類的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: lemmatize

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public List<List<String>> lemmatize(String documentText)
{
	List<List<String>> lemmas = new ArrayList<List<String>>();

	// create an empty Annotation just with the given text
	Annotation document = new Annotation(documentText);

	// run all Annotators on this text
	this.parser.annotate(document);

	// Iterate over all of the sentences found
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	for(CoreMap sentence: sentences) {
		// Iterate over all tokens in a sentence
		List<String> sentence_lemmas = new ArrayList<String>();
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			// Retrieve and add the lemma for each word into the
			// list of lemmas
			sentence_lemmas.add(token.get(LemmaAnnotation.class));
		}
		lemmas.add(sentence_lemmas);
	}

	return lemmas;
}

開發者ID:uwnlp，項目名稱:recipe-interpretation，代碼行數:26，代碼來源:Lemmatizer.java

示例2: tagAndTokenize

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public Pair<List<String>, List<String>> tagAndTokenize(String documentText)
{
	List<String> tags = new ArrayList<String>();
	List<String> tokens = new ArrayList<String>();

	// create an empty Annotation just with the given text
	Annotation document = new Annotation(documentText);

	// run all Annotators on this text
	this.parser.annotate(document);

	// Iterate over all of the sentences found
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	for(CoreMap sentence: sentences) {
		// Iterate over all tokens in a sentence
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			// Retrieve and add the lemma for each word into the
			// list of lemmas
			tags.add(token.get(PartOfSpeechAnnotation.class));
			tokens.add(token.word());
		}
	}

	return new Pair<List<String>, List<String>>(tags, tokens);
}

開發者ID:uwnlp，項目名稱:recipe-interpretation，代碼行數:26，代碼來源:Lemmatizer.java

示例3: tag

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public List<String> tag(String documentText)
{
	List<String> tags = new ArrayList<String>();

	// create an empty Annotation just with the given text
	Annotation document = new Annotation(documentText);

	// run all Annotators on this text
	this.parser.annotate(document);

	// Iterate over all of the sentences found
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	for(CoreMap sentence: sentences) {
		// Iterate over all tokens in a sentence
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			// Retrieve and add the lemma for each word into the
			// list of lemmas
			tags.add(token.get(PartOfSpeechAnnotation.class));
		}
	}

	return tags;
}

開發者ID:uwnlp，項目名稱:recipe-interpretation，代碼行數:24，代碼來源:Lemmatizer.java

示例4: traffer

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public static String traffer(String word) {
    List<String> lemmas = new LinkedList<String>();
    // create an empty Annotation just with the given text
    Annotation document = new Annotation(word);
    // run all Annotators on this text
    stanfordCoreNLP.annotate(document);
    // Iterate over all of the sentences found
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        // Iterate over all tokens in a sentence
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // Retrieve and add the lemma for each word into the list of lemmas
            lemmas.add(token.get(LemmaAnnotation.class));
        }
    }
    if (lemmas.size() != 1) {
        System.out.println("bug!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
    }
    return lemmas.get(0);
}

開發者ID:guozhaotong，項目名稱:FacetExtract，代碼行數:21，代碼來源:StanfordLemmatizer.java

示例5: ExtractPosTagsFile

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
@Override
public List<ExtractPosTag> ExtractPosTagsFile(File filePath) throws Exception {
       List<String> lstData=ExtractData(filePath);
       List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
       Properties props = new Properties();
       props.setProperty("annotators", "tokenize,ssplit,pos");
       StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
       for(String str:lstData)
       {
       Annotation annotation = new Annotation(str);
       pipeline.annotate(annotation);
       List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
       for(CoreMap map:senten)
       {
           map.get(TokensAnnotation.class).stream().forEach((tok) -> {
               String PosTagg=tok.get(PartOfSpeechAnnotation.class);
               lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),PosTagg));
           });
       }
     } 
    return lstTaggedSentences;
}

開發者ID:unsw-cse-soc，項目名稱:Data-curation-API，代碼行數:23，代碼來源:ExtractPosTagData.java

示例6: ExtractPosTags

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
@Override
public List<ExtractPosTag> ExtractPosTags(List<String> inputData) 
{
	List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();		
       Properties props = new Properties();
       props.setProperty("annotators", "tokenize,ssplit,pos");
       StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
       for(String str:inputData)
       {
       Annotation annotation = new Annotation(str);
       pipeline.annotate(annotation);
       List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
       for(CoreMap map:senten)
       {
           map.get(TokensAnnotation.class).stream().forEach((tok) -> {
               String getPosTag=tok.get(PartOfSpeechAnnotation.class);
               lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
           });
       }
     } 
    return lstTaggedSentences;
}

開發者ID:unsw-cse-soc，項目名稱:Data-curation-API，代碼行數:23，代碼來源:ExtractPosTagData.java

示例7: ExtractPosTagsSentence

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
@Override
public List<ExtractPosTag> ExtractPosTagsSentence(String sentence) 
{
       List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
       Properties props = new Properties();
       props.setProperty("annotators", "tokenize,ssplit,pos");
       StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

       Annotation annotation = new Annotation(sentence);
       pipeline.annotate(annotation);
       List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
       for(CoreMap map:senten)
       {
           map.get(TokensAnnotation.class).stream().forEach((tok) -> {
               String getPosTag=tok.get(PartOfSpeechAnnotation.class);
               lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
           });
       }        
       return lstTaggedSentences;
}

開發者ID:unsw-cse-soc，項目名稱:Data-curation-API，代碼行數:21，代碼來源:ExtractPosTagData.java

示例8: lemmatize

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
/**
 * Takes a string and returns a list of lemmas.
 * @param documentText
 * @return
 */
public List<String> lemmatize(String documentText)
{
    List<String> lemmas = new LinkedList<String>();

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(documentText);

    // run all Annotators on this text
    this.pipeline.annotate(document);

    // Iterate over all of the sentences found
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for(CoreMap sentence: sentences) {
        // Iterate over all tokens in a sentence
        for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
            // Retrieve and add the lemma for each word into the list of lemmas
            lemmas.add(token.get(LemmaAnnotation.class));
        }
    }

    return lemmas;
}

開發者ID:JULIELab，項目名稱:JEmAS，代碼行數:28，代碼來源:StanfordLemmatizer.java

示例9: parsingTest

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
private static void parsingTest()
{
	//	String exampleText = "The software developer who inserted a major security flaw into OpenSSL 1.2.4.8, using the file foo/bar/blah.php has said the error was \"quite trivial\" despite the severity of its impact, according to a new report.  The Sydney Morning Herald published an interview today with Robin Seggelmann, who added the flawed code to OpenSSL, the world's most popular library for implementing HTTPS encryption in websites, e-mail servers, and applications. The flaw can expose user passwords and potentially the private key used in a website's cryptographic certificate (whether private keys are at risk is still being determined). This is a new paragraph about Apache Tomcat's latest update 7.0.1.";
	String exampleText = "Microsoft Windows 7 before SP1 has Sun Java cross-site scripting vulnerability Java SE in file.php (refer to CVE-2014-1234).";
	//	String exampleText = "Oracle DBRM has vulnerability in ABCD plug-in via abcd.1234 (found on abcd.com).";
	EntityLabeler labeler = new EntityLabeler();
	Annotation doc = labeler.getAnnotatedDoc("My Doc", exampleText);
	
	List<CoreMap> sentences = doc.get(SentencesAnnotation.class);

	for ( CoreMap sentence : sentences) 
	{
		for ( CoreLabel token : sentence.get(TokensAnnotation.class)) 
		{
			System.out.println(token.get(TextAnnotation.class) + "\t" + token.get(CyberAnnotation.class));
		}
		
		System.out.println("Entities:\n" + sentence.get(CyberEntityMentionsAnnotation.class));
	
		System.out.println("Parse Tree:\n" + sentence.get(TreeAnnotation.class));		
	}
}

開發者ID:stucco，項目名稱:relation-bootstrap，代碼行數:23，代碼來源:Test.java

示例10: getExampleTextFromSerGz

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
private static String getExampleTextFromSerGz(File f)
{
	String result = "";
	
		Annotation deserDoc = EntityLabeler.deserializeAnnotatedDoc(f.getAbsolutePath());
		List<CoreMap> sentences = deserDoc.get(SentencesAnnotation.class);
		for (int sentencenum = 0; sentencenum < sentences.size(); sentencenum++) 
		{
			CoreMap sentence = sentences.get(sentencenum);
			
			List<CoreLabel> labels = sentence.get(TokensAnnotation.class);
			
		 	for (int i = 0; i < labels.size(); i++) 
		 	{
		 		CoreLabel token = labels.get(i);
		 		String tokenstring = token.get(TextAnnotation.class);
		 		result += " " + tokenstring;
		 	}
		 	result = result.trim() + "\n";
		}
		
	return result;
}

開發者ID:stucco，項目名稱:relation-bootstrap，代碼行數:24，代碼來源:Test.java

示例11: lemmatize

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public static List<String> lemmatize(String documentText){
	List<String> lemmas = new LinkedList<String>();

       // create an empty Annotation just with the given text
       Annotation document = new Annotation(documentText);

       // run all Annotators on this text
       pipeline.annotate(document);

       // Iterate over all of the sentences found
       List<CoreMap> sentences = document.get(SentencesAnnotation.class);
       for(CoreMap sentence: sentences) {
           // Iterate over all tokens in a sentence
           for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
               // Retrieve and add the lemma for each word into the list of lemmas
               lemmas.add(token.get(LemmaAnnotation.class));
           }
       }
	return lemmas;
}

開發者ID:NEO-IE，項目名稱:numbertron，代碼行數:21，代碼來源:LemmaUtils.java

示例12: lemmatize

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public List<String> lemmatize(String documentText) {
    List<String> lemmas = new LinkedList<String>();
    // Create an empty Annotation just with the given text
    Annotation document = new Annotation(documentText);
    // run all Annotators on this text
    this.pipeline.annotate(document);
    // Iterate over all of the sentences found
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        // Iterate over all tokens in a sentence
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // Retrieve and add the lemma for each word into the
            // list of lemmas
            lemmas.add(token.get(LemmaAnnotation.class));
        }
    }
    return lemmas;
}

開發者ID:tudarmstadt-lt，項目名稱:sentiment，代碼行數:19，代碼來源:StanfordLemmetizer.java

示例13: main

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public static void main(String[] args) {
	// String parse = "\nasfd\n\ndaf";
	// String[] lines = parse.split("\n");
	// System.out.println(lines.length);
	// System.exit(0);
	X.prop.put("tokenized", "true");
	X.prop.put("singleSentences", "true");
	initPipeline(true, true);
	Annotation ann = new Annotation(
			"BOSTON 69 65 .515 5 1/2\n1. Michelle Freeman ( Jamaica ) 12.71 seconds");
	pipeline.annotate(ann);
	for (CoreMap sent : ann.get(SentencesAnnotation.class)) {
		for (CoreLabel token : sent.get(TokensAnnotation.class)) {
			logger.info(token.get(TextAnnotation.class));
		}
		logger.info("sentence done");
	}

}

開發者ID:zhangcongle，項目名稱:NewsSpikeRe，代碼行數:20，代碼來源:Preprocessing.java

示例14: parse

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public static void parse(FigerSystem sys, int lineId, String text) {
	Annotation annotation = new Annotation(text);
	Preprocessing.pipeline.annotate(annotation);
	// for each sentence
	int sentId = 0;
	for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
		// System.out.println("[l" + i + "][s"
		// + sentId + "]tokenized sentence="
		// + StringUtils.joinWithOriginalWhiteSpace(sentence
		// .get(TokensAnnotation.class)));
		List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
		for (Pair<Integer, Integer> offset : entityMentionOffsets) {
			String label = sys.predict(annotation, sentId,
					offset.first, offset.second);
			String mention = StringUtils.joinWithOriginalWhiteSpace(sentence.get(
					TokensAnnotation.class).subList(offset.first, offset.second));
			System.out.println("[l" + lineId + "][s" + sentId + "]mention"
					+ mention + "(" + offset.first + ","
					+ offset.second + ") = " + mention + ", pred = "
					+ label);
		}
		sentId++;
	}
}

開發者ID:zhangcongle，項目名稱:NewsSpikeRe，代碼行數:25，代碼來源:FigerSystem.java

示例15: parse

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //導入依賴的package包/類
public static void parse(ParseStanfordFigerReverb sys, int lineId, String text) {
	Annotation annotation = new Annotation(text);
	Preprocessing.pipeline.annotate(annotation);
	// for each sentence
	int sentId = 0;
	for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
		// System.out.println("[l" + i + "][s"
		// + sentId + "]tokenized sentence="
		// + StringUtils.joinWithOriginalWhiteSpace(sentence
		// .get(TokensAnnotation.class)));
		List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
		for (Pair<Integer, Integer> offset : entityMentionOffsets) {
			String label = sys.predict(annotation, sentId, offset.first, offset.second);
			String mention = StringUtils.joinWithOriginalWhiteSpace(
					sentence.get(TokensAnnotation.class).subList(offset.first, offset.second));
			System.out.println("[l" + lineId + "][s" + sentId + "]mention" + mention + "(" + offset.first + ","
					+ offset.second + ") = " + mention + ", pred = " + label);
		}
		sentId++;
	}
}

開發者ID:zhangcongle，項目名稱:NewsSpikeRe，代碼行數:22，代碼來源:ParseStanfordFigerReverb.java

注：本文中的edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。