本文整理汇总了Java中edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation类的典型用法代码示例。如果您正苦于以下问题:Java TextAnnotation类的具体用法?Java TextAnnotation怎么用?Java TextAnnotation使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TextAnnotation类属于edu.stanford.nlp.ling.CoreAnnotations包,在下文中一共展示了TextAnnotation类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
// 载入自定义的Properties文件
StanfordCoreNLP pipeline = new StanfordCoreNLP("CoreNLP-chinese.properties");
// 用一些文本来初始化一个注释。文本是构造函数的参数。
Annotation annotation;
annotation = pipeline.process("我爱北京天安门");
// 从注释中获取CoreMap List,并取第0个值
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
CoreMap sentence = sentences.get(0);
// 从CoreMap中取出CoreLabel List,逐一打印出来
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
System.out.println("字/词");
System.out.println("-----------------------------");
for (CoreLabel token : tokens) {
String word = token.getString(TextAnnotation.class);
// String pos = token.getString(PartOfSpeechAnnotation.class);
// String ner = token.getString(NamedEntityTagAnnotation.class);
System.out.println(word);
}
}
示例2: TokenizedCoreLabelWrapper
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
/**
*
*/
public TokenizedCoreLabelWrapper(final CoreLabel cl) {
this.value = cl.get(ValueAnnotation.class);
this.text = cl.get(TextAnnotation.class);
LOGGER.trace("Wrapping token text: {}", this.text);
this.originalText = cl.get(OriginalTextAnnotation.class);
this.before = cl.get(BeforeAnnotation.class);
this.after = cl.get(AfterAnnotation.class);
this.startSentenceOffset = cl.get(CharacterOffsetBeginAnnotation.class);
this.endSentenceOffset = cl.get(CharacterOffsetEndAnnotation.class);
this.startOffset = Optional.ofNullable(cl.get(TokenBeginAnnotation.class));
this.endOffset = Optional.ofNullable(cl.get(TokenEndAnnotation.class));
LOGGER.trace("TokenBegin: {}", this.startOffset);
LOGGER.trace("TokenEnd: {}", this.endOffset);
this.idx = cl.get(IndexAnnotation.class);
this.sentenceIdx = cl.get(SentenceIndexAnnotation.class);
LOGGER.trace("Got sentence idx: {}", this.sentenceIdx);
}
示例3: test
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
@Test
public void test() throws Exception {
ConcreteStanfordTokensSentenceAnalytic firstAnalytic = new ConcreteStanfordTokensSentenceAnalytic();
TokenizedCommunication tc = firstAnalytic.annotate(this.comm);
List<CoreMap> allCmList = new ArrayList<>();
tc.getSections().forEach(sect -> {
LOGGER.debug("Annotation section: {}", sect.getUuid().getUuidString());
// TextSpan ts = sect.getTextSpan();
// String sectText = this.txt.substring(ts.getStart(), ts.getEnding());
allCmList.addAll(ConcreteToStanfordMapper.concreteSectionToCoreMapList(sect, this.txt));
});
Annotation at = new Annotation(allCmList);
at.set(TextAnnotation.class, this.txt);
// (StanfordCoreNLP.getExistingAnnotator("pos")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("lemma")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("ner")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("parse")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("dcoref")).annotate(at);
// this.pipeline.annotate(at);
// LOGGER.info("Coref results:");
LOGGER.info(at.toShorterString(new String[0]));
for (CoreMap cm : allCmList) {
LOGGER.info("Got CoreMap: {}", cm.toShorterString(new String[0]));
}
}
示例4: parsingTest
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
private static void parsingTest()
{
// String exampleText = "The software developer who inserted a major security flaw into OpenSSL 1.2.4.8, using the file foo/bar/blah.php has said the error was \"quite trivial\" despite the severity of its impact, according to a new report. The Sydney Morning Herald published an interview today with Robin Seggelmann, who added the flawed code to OpenSSL, the world's most popular library for implementing HTTPS encryption in websites, e-mail servers, and applications. The flaw can expose user passwords and potentially the private key used in a website's cryptographic certificate (whether private keys are at risk is still being determined). This is a new paragraph about Apache Tomcat's latest update 7.0.1.";
String exampleText = "Microsoft Windows 7 before SP1 has Sun Java cross-site scripting vulnerability Java SE in file.php (refer to CVE-2014-1234).";
// String exampleText = "Oracle DBRM has vulnerability in ABCD plug-in via abcd.1234 (found on abcd.com).";
EntityLabeler labeler = new EntityLabeler();
Annotation doc = labeler.getAnnotatedDoc("My Doc", exampleText);
List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
for ( CoreMap sentence : sentences)
{
for ( CoreLabel token : sentence.get(TokensAnnotation.class))
{
System.out.println(token.get(TextAnnotation.class) + "\t" + token.get(CyberAnnotation.class));
}
System.out.println("Entities:\n" + sentence.get(CyberEntityMentionsAnnotation.class));
System.out.println("Parse Tree:\n" + sentence.get(TreeAnnotation.class));
}
}
示例5: getExampleTextFromSerGz
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
private static String getExampleTextFromSerGz(File f)
{
String result = "";
Annotation deserDoc = EntityLabeler.deserializeAnnotatedDoc(f.getAbsolutePath());
List<CoreMap> sentences = deserDoc.get(SentencesAnnotation.class);
for (int sentencenum = 0; sentencenum < sentences.size(); sentencenum++)
{
CoreMap sentence = sentences.get(sentencenum);
List<CoreLabel> labels = sentence.get(TokensAnnotation.class);
for (int i = 0; i < labels.size(); i++)
{
CoreLabel token = labels.get(i);
String tokenstring = token.get(TextAnnotation.class);
result += " " + tokenstring;
}
result = result.trim() + "\n";
}
return result;
}
示例6: main
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
// String parse = "\nasfd\n\ndaf";
// String[] lines = parse.split("\n");
// System.out.println(lines.length);
// System.exit(0);
X.prop.put("tokenized", "true");
X.prop.put("singleSentences", "true");
initPipeline(true, true);
Annotation ann = new Annotation(
"BOSTON 69 65 .515 5 1/2\n1. Michelle Freeman ( Jamaica ) 12.71 seconds");
pipeline.annotate(ann);
for (CoreMap sent : ann.get(SentencesAnnotation.class)) {
for (CoreLabel token : sent.get(TokensAnnotation.class)) {
logger.info(token.get(TextAnnotation.class));
}
logger.info("sentence done");
}
}
示例7: posTagLineToArray
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
/**
*
* POS-tag sentence and return an array of Pairs that contain the POS-tag and word.
* @param line
* @return
*/
public static fig.basic.Pair<String, String>[] posTagLineToArray(String line)
{
Annotation document = new Annotation(line);
pipeline.annotate(document);
List<fig.basic.Pair<String, String>> out = new ArrayList<>();
for(CoreMap sentence: document.get(SentencesAnnotation.class))
{
List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
for(CoreLabel token : tokens)
{
out.add(new fig.basic.Pair(token.get(PartOfSpeechAnnotation.class), token.get(TextAnnotation.class)));
}
}
return out.toArray(new fig.basic.Pair[0]);
}
示例8: main
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static void main(String[] args) {
// String exampleText = "The software developer who inserted a major security flaw into OpenSSL 1.2.4.8, using the file foo/bar/blah.php has said the error was \"quite trivial\" despite the severity of its impact, according to a new report. The Sydney Morning Herald published an interview today with Robin Seggelmann, who added the flawed code to OpenSSL, the world's most popular library for implementing HTTPS encryption in websites, e-mail servers, and applications. The flaw can expose user passwords and potentially the private key used in a website's cryptographic certificate (whether private keys are at risk is still being determined). This is a new paragraph about Apache Tomcat's latest update 7.0.1.";
String exampleText = "Microsoft Windows 7 before SP1 has Sun Java cross-site scripting vulnerability Java SE in file.php (refer to CVE-2014-1234).";
// String exampleText = "Oracle DBRM has vulnerability in ABCD plug-in via abcd.1234 (found on abcd.com).";
EntityLabeler labeler = new EntityLabeler();
Annotation doc = labeler.getAnnotatedDoc("My Doc", exampleText);
List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
for ( CoreMap sentence : sentences) {
for ( CoreLabel token : sentence.get(TokensAnnotation.class)) {
System.out.println(token.get(TextAnnotation.class) + "\t" + token.get(CyberAnnotation.class));
}
System.out.println("Entities:\n" + sentence.get(CyberEntityMentionsAnnotation.class));
System.out.println("Parse Tree:\n" + sentence.get(TreeAnnotation.class));
}
}
示例9: main
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
/**
* @param args
*/
public static void main(String[] args) {
String testSentence = "Microsoft Windows XP before 2.8 has cross-site scripting vulnerability in file.php (refer to CVE-2014-1234).";
EntityLabeler labeler = new EntityLabeler();
Annotation doc = labeler.getAnnotatedDoc("My Doc", testSentence);
List<CoreMap> sentences = doc.get(SentencesAnnotation.class);
for ( CoreMap sentence : sentences) {
// Label cyber entities appropriately
for ( CoreLabel token : sentence.get(TokensAnnotation.class)) {
System.out.println(token.get(TextAnnotation.class) + "\t\t" + token.get(CyberAnnotation.class));
}
System.out.println();
}
RelationExtractor rx = new RelationExtractor("src/main/resources/patterns_relations_abbrev.json");
System.out.println(rx.createSubgraph(doc, "CNN"));
}
示例10: annotate
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
@Override
public void annotate(Annotation annotation) {
if (verbose) {
timer.start();
System.err.print("Adding normalized token annotation...");
}
if (annotation.has(CoreAnnotations.SentencesAnnotation.class)) {
List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
for (int i = 0; i < tokens.size(); i++) {
CoreLabel token = tokens.get(i);
String text = token.get(TextAnnotation.class);
String lemma = token.get(LemmaAnnotation.class);
this.addLemma(NormalizerAnnotation.class, token, lemma, text);
}
}
} else {
throw new RuntimeException("unable to find words/tokens in: " + annotation);
}
if (verbose)
timer.stop("done.");
}
示例11: adjustCharacterOffsets
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static List<CoreMap> adjustCharacterOffsets(List<CoreMap> sentences, boolean setOriginalText)
{
List<CoreMap> sentencesCopy = sentences;
for (CoreMap sentence : sentencesCopy)
{
List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
int characterCount = 0;
for (int i = 0; i < sentenceTokens.size(); i++)
{
CoreLabel token = sentenceTokens.get(i);
if (setOriginalText)
{
token.set(CoreAnnotations.OriginalTextAnnotation.class, token.get(CoreAnnotations.TextAnnotation.class) + " ");
}
int startCharacterCount = characterCount;
int endCharacterCount = startCharacterCount + token.get(CoreAnnotations.OriginalTextAnnotation.class).length();
token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, startCharacterCount);
token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endCharacterCount);
sentenceTokens.set(i, token);
characterCount = endCharacterCount;
}
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
}
return sentencesCopy;
}
示例12: makeVertex
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
private IndexedWord makeVertex(String word) {
Integer index; // initialized below
Pair<String, Integer> wordAndIndex = readWordAndIndex(word);
if (wordAndIndex != null) {
word = wordAndIndex.first();
index = wordAndIndex.second();
} else {
index = getNextFreeIndex();
}
indexesUsed.add(index);
// Note that, despite the use of indexesUsed and getNextFreeIndex(),
// nothing is actually enforcing that no indexes are used twice. This
// could occur if some words in the string representation being parsed
// come with index markers and some do not.
IndexedWord ifl = new IndexedWord(null, 0, index);
// System.err.println("SemanticGraphParsingTask>>> word = " + word);
// System.err.println("SemanticGraphParsingTask>>> index = " + index);
// System.err.println("SemanticGraphParsingTask>>> indexesUsed = " +
// indexesUsed);
String[] wordAndTag = word.split("/");
ifl.set(TextAnnotation.class, wordAndTag[0]);
if (wordAndTag.length > 1)
ifl.set(PartOfSpeechAnnotation.class, wordAndTag[1]);
return ifl;
}
示例13: sentenceSplitter
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static List<String> sentenceSplitter(List<CoreMap> sentences) {
List<String> sentenceList = new ArrayList<String>();
for (CoreMap sentence : sentences) {
String sentenceString = "";
for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
String word = token.get(TextAnnotation.class);
sentenceString += word + " ";
}
sentenceList.add(sentenceString);
}
return sentenceList;
}
示例14: ResolvePronoun
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static Map<Pair<Integer,Integer>,String> ResolvePronoun(String Text) throws Exception {
//document text.
String s1 = Text;
//annotating document.
Annotation Document = new Annotation(s1);
processMain.pipeline.annotate(Document);
//This is the coreference Chain obtained from the text.
Map<Integer,CorefChain> graph = Document.get(CorefChainAnnotation.class);
Map<Pair<Integer,Integer>,String> answer = new HashMap<Pair<Integer,Integer>,String>();
//for all coref chains extract the representative mention and their corresponding references.
for(Map.Entry<Integer, CorefChain> entry: graph.entrySet()){
CorefChain c = entry.getValue();
if(c.getMentionsInTextualOrder().size()<=1){
continue;
}
//representative mention.
CorefMention cm = c.getRepresentativeMention();
String clust = "";
//all references for the representative.
List<CoreLabel> tks = Document.get(SentencesAnnotation.class).get(cm.sentNum-1).get(TokensAnnotation.class);
for(int i = cm.startIndex-1; i < cm.endIndex-1; i++)
clust += tks.get(i).get(TextAnnotation.class) + " ";
clust = clust.trim();
for(CorefMention m : c.getMentionsInTextualOrder()){
//putting all the tupples in the Map.
answer.put(new Pair<Integer,Integer>(m.sentNum-1,m.startIndex-1), clust);
}
}
return answer;
}
示例15: concreteSectionToCoreMapList
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation; //导入依赖的package包/类
public static List<CoreMap> concreteSectionToCoreMapList(final Section sect, final String commText) {
List<CoreMap> toRet = new ArrayList<>();
List<Sentence> sentList = sect.getSentenceList();
int tokOffset = 0;
for (int i = 0; i < sentList.size(); i++) {
Sentence st = sentList.get(i);
CoreMap cm = new ArrayCoreMap();
cm.set(SentenceIndexAnnotation.class, i);
final TextSpan sts = st.getTextSpan();
final int sentCharStart = sts.getStart();
final int sentCharEnd = sts.getEnding();
LOGGER.debug("Setting stanford sentence BeginChar = {}", sentCharStart);
cm.set(CharacterOffsetBeginAnnotation.class, sentCharStart);
LOGGER.debug("Setting stanford sentence EndChar = {}", sentCharEnd);
cm.set(CharacterOffsetEndAnnotation.class, sentCharEnd);
String sectText = commText.substring(sentCharStart, sentCharEnd);
LOGGER.debug("Setting text: {}", sectText);
cm.set(TextAnnotation.class, sectText);
Tokenization tkz = st.getTokenization();
List<CoreLabel> clList = tokenizationToCoreLabelList(tkz, i, sentCharStart);
final int maxIdx = clList.size();
LOGGER.debug("Setting stanford sentence token begin: {}", tokOffset);
cm.set(TokenBeginAnnotation.class, tokOffset);
final int tokEnd = tokOffset + maxIdx;
LOGGER.debug("Setting stanford sentence token end: {}", tokEnd);
cm.set(TokenEndAnnotation.class, tokEnd);
cm.set(TokensAnnotation.class, clList);
tokOffset = tokEnd;
toRet.add(cm);
}
return toRet;
}