本文整理汇总了Java中edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation类的典型用法代码示例。如果您正苦于以下问题:Java TokensAnnotation类的具体用法?Java TokensAnnotation怎么用?Java TokensAnnotation使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TokensAnnotation类属于edu.stanford.nlp.ling.CoreAnnotations包,在下文中一共展示了TokensAnnotation类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: lemmatize
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public List<List<String>> lemmatize(String documentText)
{
List<List<String>> lemmas = new ArrayList<List<String>>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
List<String> sentence_lemmas = new ArrayList<String>();
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
sentence_lemmas.add(token.get(LemmaAnnotation.class));
}
lemmas.add(sentence_lemmas);
}
return lemmas;
}
示例2: tagAndTokenize
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public Pair<List<String>, List<String>> tagAndTokenize(String documentText)
{
List<String> tags = new ArrayList<String>();
List<String> tokens = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
tokens.add(token.word());
}
}
return new Pair<List<String>, List<String>>(tags, tokens);
}
示例3: tag
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public List<String> tag(String documentText)
{
List<String> tags = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
}
}
return tags;
}
示例4: traffer
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public static String traffer(String word) {
List<String> lemmas = new LinkedList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(word);
// run all Annotators on this text
stanfordCoreNLP.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the list of lemmas
lemmas.add(token.get(LemmaAnnotation.class));
}
}
if (lemmas.size() != 1) {
System.out.println("bug!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
}
return lemmas.get(0);
}
示例5: ExtractPosTagsFile
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTagsFile(File filePath) throws Exception {
List<String> lstData=ExtractData(filePath);
List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
for(String str:lstData)
{
Annotation annotation = new Annotation(str);
pipeline.annotate(annotation);
List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap map:senten)
{
map.get(TokensAnnotation.class).stream().forEach((tok) -> {
String PosTagg=tok.get(PartOfSpeechAnnotation.class);
lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),PosTagg));
});
}
}
return lstTaggedSentences;
}
示例6: ExtractPosTags
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTags(List<String> inputData)
{
List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
for(String str:inputData)
{
Annotation annotation = new Annotation(str);
pipeline.annotate(annotation);
List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap map:senten)
{
map.get(TokensAnnotation.class).stream().forEach((tok) -> {
String getPosTag=tok.get(PartOfSpeechAnnotation.class);
lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
});
}
}
return lstTaggedSentences;
}
示例7: ExtractPosTagsSentence
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTagsSentence(String sentence)
{
List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
Annotation annotation = new Annotation(sentence);
pipeline.annotate(annotation);
List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap map:senten)
{
map.get(TokensAnnotation.class).stream().forEach((tok) -> {
String getPosTag=tok.get(PartOfSpeechAnnotation.class);
lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
});
}
return lstTaggedSentences;
}
示例8: lemmatize
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
/**
* Takes a string and returns a list of lemmas.
* @param documentText
* @return
*/
public List<String> lemmatize(String documentText)
{
List<String> lemmas = new LinkedList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.pipeline.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the list of lemmas
lemmas.add(token.get(LemmaAnnotation.class));
}
}
return lemmas;
}
示例9: parsingTest
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
private static void parsingTest()
{
// String exampleText = "The software developer who inserted a major security flaw into OpenSSL 1.2.4.8, using the file foo/bar/blah.php has said the error was \"quite trivial\" despite the severity of its impact, according to a new report. The Sydney Morning Herald published an interview today with Robin Seggelmann, who added the flawed code to OpenSSL, the world's most popular library for implementing HTTPS encryption in websites, e-mail servers, and applications. The flaw can expose user passwords and potentially the private key used in a website's cryptographic certificate (whether private keys are at risk is still being determined). This is a new paragraph about Apache Tomcat's latest update 7.0.1.";
String exampleText = "Microsoft Windows 7 before SP1 has Sun Java cross-site scripting vulnerability Java SE in file.php (refer to CVE-2014-1234).";
// String exampleText = "Oracle DBRM has vulnerability in ABCD plug-in via abcd.1234 (found on abcd.com).";
EntityLabeler labeler = new EntityLabeler();
Annotation doc = labeler.getAnnotatedDoc("My Doc", exampleText);
List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
for ( CoreMap sentence : sentences)
{
for ( CoreLabel token : sentence.get(TokensAnnotation.class))
{
System.out.println(token.get(TextAnnotation.class) + "\t" + token.get(CyberAnnotation.class));
}
System.out.println("Entities:\n" + sentence.get(CyberEntityMentionsAnnotation.class));
System.out.println("Parse Tree:\n" + sentence.get(TreeAnnotation.class));
}
}
示例10: getExampleTextFromSerGz
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
private static String getExampleTextFromSerGz(File f)
{
String result = "";
Annotation deserDoc = EntityLabeler.deserializeAnnotatedDoc(f.getAbsolutePath());
List<CoreMap> sentences = deserDoc.get(SentencesAnnotation.class);
for (int sentencenum = 0; sentencenum < sentences.size(); sentencenum++)
{
CoreMap sentence = sentences.get(sentencenum);
List<CoreLabel> labels = sentence.get(TokensAnnotation.class);
for (int i = 0; i < labels.size(); i++)
{
CoreLabel token = labels.get(i);
String tokenstring = token.get(TextAnnotation.class);
result += " " + tokenstring;
}
result = result.trim() + "\n";
}
return result;
}
示例11: lemmatize
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public static List<String> lemmatize(String documentText){
List<String> lemmas = new LinkedList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
pipeline.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the list of lemmas
lemmas.add(token.get(LemmaAnnotation.class));
}
}
return lemmas;
}
示例12: lemmatize
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public List<String> lemmatize(String documentText) {
List<String> lemmas = new LinkedList<String>();
// Create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.pipeline.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
lemmas.add(token.get(LemmaAnnotation.class));
}
}
return lemmas;
}
示例13: main
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
// String parse = "\nasfd\n\ndaf";
// String[] lines = parse.split("\n");
// System.out.println(lines.length);
// System.exit(0);
X.prop.put("tokenized", "true");
X.prop.put("singleSentences", "true");
initPipeline(true, true);
Annotation ann = new Annotation(
"BOSTON 69 65 .515 5 1/2\n1. Michelle Freeman ( Jamaica ) 12.71 seconds");
pipeline.annotate(ann);
for (CoreMap sent : ann.get(SentencesAnnotation.class)) {
for (CoreLabel token : sent.get(TokensAnnotation.class)) {
logger.info(token.get(TextAnnotation.class));
}
logger.info("sentence done");
}
}
示例14: parse
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public static void parse(FigerSystem sys, int lineId, String text) {
Annotation annotation = new Annotation(text);
Preprocessing.pipeline.annotate(annotation);
// for each sentence
int sentId = 0;
for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
// System.out.println("[l" + i + "][s"
// + sentId + "]tokenized sentence="
// + StringUtils.joinWithOriginalWhiteSpace(sentence
// .get(TokensAnnotation.class)));
List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
for (Pair<Integer, Integer> offset : entityMentionOffsets) {
String label = sys.predict(annotation, sentId,
offset.first, offset.second);
String mention = StringUtils.joinWithOriginalWhiteSpace(sentence.get(
TokensAnnotation.class).subList(offset.first, offset.second));
System.out.println("[l" + lineId + "][s" + sentId + "]mention"
+ mention + "(" + offset.first + ","
+ offset.second + ") = " + mention + ", pred = "
+ label);
}
sentId++;
}
}
示例15: parse
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation; //导入依赖的package包/类
public static void parse(ParseStanfordFigerReverb sys, int lineId, String text) {
Annotation annotation = new Annotation(text);
Preprocessing.pipeline.annotate(annotation);
// for each sentence
int sentId = 0;
for (CoreMap sentence : annotation.get(SentencesAnnotation.class)) {
// System.out.println("[l" + i + "][s"
// + sentId + "]tokenized sentence="
// + StringUtils.joinWithOriginalWhiteSpace(sentence
// .get(TokensAnnotation.class)));
List<Pair<Integer, Integer>> entityMentionOffsets = getNamedEntityMentions(sentence);
for (Pair<Integer, Integer> offset : entityMentionOffsets) {
String label = sys.predict(annotation, sentId, offset.first, offset.second);
String mention = StringUtils.joinWithOriginalWhiteSpace(
sentence.get(TokensAnnotation.class).subList(offset.first, offset.second));
System.out.println("[l" + lineId + "][s" + sentId + "]mention" + mention + "(" + offset.first + ","
+ offset.second + ") = " + mention + ", pred = " + label);
}
sentId++;
}
}