本文整理汇总了Java中edu.stanford.nlp.pipeline.Annotation.set方法的典型用法代码示例。如果您正苦于以下问题:Java Annotation.set方法的具体用法?Java Annotation.set怎么用?Java Annotation.set使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类edu.stanford.nlp.pipeline.Annotation
的用法示例。
在下文中一共展示了Annotation.set方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: prepareSUTParser
import edu.stanford.nlp.pipeline.Annotation; //导入方法依赖的package包/类
/**
* Prepares the check for a temporal expression.
*
* @param cell
* Holds the column´s cell
* @param pipeline
* Used for temporal expressions.
* @param result
* Holds the intermediate result before executing this operation.
* @return Holds the intermediate result after executing this operation.
*/
private int prepareSUTParser(String cell, AnnotationPipeline pipeline,
int result) {
if ((!cell.trim().isEmpty()) && (!cell.trim().equals("-")
&& !cell.trim().equals("--") && !cell.trim().equals("---")
&& !cell.trim().equals("n/a") && !cell.trim().equals("N/A")
&& !cell.trim().equals("(n/a)")
&& !cell.trim().equals("Unknown")
&& !cell.trim().equals("unknown") && !cell.trim().equals("?")
&& !cell.trim().equals("??") && !cell.trim().equals(".")
&& !cell.trim().equals("null") && !cell.trim().equals("NULL")
&& !cell.trim().equals("Null"))) {
Annotation annotation = new Annotation(cell);
annotation.set(CoreAnnotations.DocDateAnnotation.class,
"2013-07-14");
pipeline.annotate(annotation);
List<CoreMap> timexAnnsAll = annotation
.get(TimeAnnotations.TimexAnnotations.class);
if (timexAnnsAll != null)
if (!timexAnnsAll.isEmpty())
result++;
}
return result;
}
示例2: test
import edu.stanford.nlp.pipeline.Annotation; //导入方法依赖的package包/类
@Test
public void test() throws Exception {
ConcreteStanfordTokensSentenceAnalytic firstAnalytic = new ConcreteStanfordTokensSentenceAnalytic();
TokenizedCommunication tc = firstAnalytic.annotate(this.comm);
List<CoreMap> allCmList = new ArrayList<>();
tc.getSections().forEach(sect -> {
LOGGER.debug("Annotation section: {}", sect.getUuid().getUuidString());
// TextSpan ts = sect.getTextSpan();
// String sectText = this.txt.substring(ts.getStart(), ts.getEnding());
allCmList.addAll(ConcreteToStanfordMapper.concreteSectionToCoreMapList(sect, this.txt));
});
Annotation at = new Annotation(allCmList);
at.set(TextAnnotation.class, this.txt);
// (StanfordCoreNLP.getExistingAnnotator("pos")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("lemma")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("ner")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("parse")).annotate(at);
// (StanfordCoreNLP.getExistingAnnotator("dcoref")).annotate(at);
// this.pipeline.annotate(at);
// LOGGER.info("Coref results:");
LOGGER.info(at.toShorterString(new String[0]));
for (CoreMap cm : allCmList) {
LOGGER.info("Got CoreMap: {}", cm.toShorterString(new String[0]));
}
}
示例3: addLemmas
import edu.stanford.nlp.pipeline.Annotation; //导入方法依赖的package包/类
private List<CoreLabel> addLemmas(List<CoreLabel> words) {
Annotation sent = new Annotation("");
sent.set(TokensAnnotation.class, words);
List<CoreMap> sentences = new ArrayList<>();
sentences.add(sent);
Annotation document = new Annotation("");
document.set(SentencesAnnotation.class, sentences);
lemmatizer.annotate(document);
return words;
}
示例4: parseAll
import edu.stanford.nlp.pipeline.Annotation; //导入方法依赖的package包/类
private KAFDocument parseAll(KAFDocument NAFdocument, Properties merge) throws Exception {
String text = NAFdocument.getRawText();
text = StringEscapeUtils.unescapeHtml(text);
Properties properties = getDefaultConfig();
properties.putAll(merge);
String maxTextLen = properties.getProperty("max_text_len");
int limit = Integer.parseInt(maxTextLen);
if (text.length() > limit) {
throw new Exception(String.format("Input too long (%d chars, limit is %d)", text.length(), limit));
}
loadModels(properties);
Properties stanfordConfig = PropertiesUtils.dotConvertedProperties(properties, "stanford");
// Load pipeline
Properties thisSessionProps = new Properties(stanfordConfig);
StanfordCoreNLP thisPipeline = new StanfordCoreNLP(thisSessionProps);
// Stanford
logger.info("Annotating with Stanford CoreNLP");
LinguisticProcessor linguisticProcessor = new LinguisticProcessor("text", "Stanford CoreNLP");
linguisticProcessor.setBeginTimestamp();
Annotation document = new Annotation(text);
document.set(CoreAnnotations.DocDateAnnotation.class, NAFdocument.getFileDesc().creationtime);
thisPipeline.annotate(document);
logger.info(thisPipeline.timingInformation());
linguisticProcessor.setEndTimestamp();
NAFdocument.addLinguisticProcessor(linguisticProcessor.getLayer(), linguisticProcessor);
annotateStanford(properties, document, NAFdocument);
logger.info("Parsing finished");
return NAFdocument;
}
示例5: annotate
import edu.stanford.nlp.pipeline.Annotation; //导入方法依赖的package包/类
/**
* Given an Annotation, perform a task on this Annotation.
*
* @param annotation
*/
@Override public void annotate(Annotation annotation) {
Readability readability = null;
if (className != null) {
try {
Class<? extends Readability> obj = (Class<? extends Readability>) Class.forName(className);
Constructor<? extends Readability> constructor = obj.getConstructor(Properties.class, Properties.class, Annotation.class);
readability = constructor.newInstance(globalProperties, localProperties, annotation);
} catch (Exception e) {
LOGGER.error(e.getMessage());
}
}
if (readability == null) {
if (language == null) {
LOGGER.warn("Language variable is not defined, readability will be empty");
return;
}
switch (language) {
case "it":
readability = new ItalianStandardReadability(globalProperties, localProperties, annotation);
break;
case "es":
readability = new SpanishStandardReadability(globalProperties, localProperties, annotation);
break;
case "en":
readability = new EnglishStandardReadability(globalProperties, localProperties, annotation);
break;
case "gl":
readability = new GalicianStandardReadability(globalProperties, localProperties, annotation);
break;
// default:
// readability = new EnglishReadability();
}
}
if (readability == null) {
return;
}
List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
int tokenCount = 0;
readability.setSentenceCount(sentences.size());
for (CoreMap sentence : sentences) {
int sentenceID = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class);
int wordsNow = readability.getWordCount();
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
readability.addWord(token);
tokenCount++;
}
int words = readability.getWordCount() - wordsNow;
if (words > maxSentenceLength) {
readability.addTooLongSentence(sentenceID);
}
}
readability.setTokenCount(tokenCount);
readability.finalizeReadability();
annotation.set(ReadabilityAnnotations.ReadabilityAnnotation.class, readability);
}
示例6: annotate
import edu.stanford.nlp.pipeline.Annotation; //导入方法依赖的package包/类
@Override
public TokenizedCommunication annotate(TokenizedCommunication arg0) throws AnalyticException {
final Communication root = new Communication(arg0.getRoot());
if (!root.isSetText())
throw new AnalyticException("communication.text must be set to run this analytic.");
AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(root);
AnalyticUUIDGenerator g = f.create();
final List<Section> sectList = root.getSectionList();
final String commText = root.getText();
List<CoreMap> allCoreMaps = new ArrayList<>();
// String noMarkup = MarkupRewriter.removeMarkup(commText);
String noMarkup = commText;
sectList.forEach(sect -> {
List<CoreMap> cmList = ConcreteToStanfordMapper.concreteSectionToCoreMapList(sect, commText);
allCoreMaps.addAll(cmList);
});
allCoreMaps.forEach(cm -> LOGGER.trace("Got CoreMap pre-coref: {}", cm.toShorterString(new String[0])));
Annotation anno = new Annotation(allCoreMaps);
anno.set(TextAnnotation.class, noMarkup);
// TODO: it's possible that fixNullDependencyGraphs needs to be called
// before dcoref annotator is called. TB investigated further.
for (String annotator : this.lang.getPostTokenizationAnnotators()) {
LOGGER.debug("Running annotator: {}", annotator);
(StanfordCoreNLP.getExistingAnnotator(annotator)).annotate(anno);
}
anno.get(SentencesAnnotation.class).forEach(cm -> LOGGER.trace("Got CoreMaps post-coref: {}", cm.toShorterString(new String[0])));
// TODO: not sure if this is necessary - found it in the old code.
anno.get(SentencesAnnotation.class).stream().filter(cm -> cm.containsKey(TreeAnnotation.class)).forEach(cm -> {
Tree tree = cm.get(TreeAnnotation.class);
List<Tree> treeList = new ArrayList<>();
treeList.add(tree);
ParserAnnotatorUtils.fillInParseAnnotations(false, true, this.lang.getGrammaticalFactory(), cm, treeList.get(0), GrammaticalStructure.Extras.NONE);
});
anno.get(SentencesAnnotation.class).forEach(cm -> LOGGER.trace("Got CoreMap post-fill-in: {}", cm.toShorterString(new String[0])));
List<Sentence> postSentences = annotationToSentenceList(anno, hf, arg0.getSentences(), g);
postSentences.forEach(st -> LOGGER.trace("Got pre-coref sentence: {}", st.toString()));
Map<TextSpan, Sentence> tsToSentenceMap = new HashMap<>();
postSentences.forEach(st -> tsToSentenceMap.put(st.getTextSpan(), st));
tsToSentenceMap.keySet().forEach(k -> LOGGER.trace("Got TextSpan key: {}", k.toString()));
sectList.forEach(sect -> {
List<Sentence> sentList = sect.getSentenceList();
sentList.forEach(st -> {
TextSpan ts = st.getTextSpan();
LOGGER.debug("Trying to find span: {}", ts.toString());
if (tsToSentenceMap.containsKey(ts)) {
Sentence newSent = tsToSentenceMap.get(ts);
st.setTokenization(newSent.getTokenization());
} else {
throw new RuntimeException("Didn't find sentence in the new sentences. Old sentence UUID: " + st.getUuid().getUuidString());
}
});
});
try {
// Coref.
CorefManager coref = new CorefManager(new CachedTokenizationCommunication(root), anno);
TokenizedCommunication tcWithCoref = coref.addCoreference();
return tcWithCoref;
} catch (MiscommunicationException e) {
throw new AnalyticException(e);
}
}
示例7: processParses
import edu.stanford.nlp.pipeline.Annotation; //导入方法依赖的package包/类
/**
* Start from parsed trees, and run the coref.
*/
public List<EntityMention> processParses(Collection<Tree> trees) {
CoreLabelTokenFactory tokenfactory = new CoreLabelTokenFactory();
List<EntityMention> entities = null;
// Create an empty Annotation
Annotation document = new Annotation("");
try {
// Setup the sentences using CoreMaps and CoreLabels.
List<CoreMap> sentences = new ArrayList<CoreMap>();
for( Tree tree : trees ) {
List<CoreLabel> sentence = new ArrayList<CoreLabel>();
CoreMap sent = new ArrayCoreMap(1);
sent.set(TokensAnnotation.class,sentence);
sentences.add(sent);
// Now add the leaves from the trees as separate tokens.
List<String> strs = TreeOperator.stringLeavesFromTree(tree);
List<String> pos = TreeOperator.posTagsFromTree(tree);
int start = 0, index = 0;
for( String str : strs ) {
CoreLabel label = tokenfactory.makeToken(str, start, start+str.length());
start += str.length() + 1;
label.set(PartOfSpeechAnnotation.class, pos.get(index++));
sentence.add(label);
}
// Now add the parse tree.
sent.set(TreeAnnotation.class, tree);
}
// Add all sentences as an annotation to the document.
document.set(CoreAnnotations.SentencesAnnotation.class, sentences);
// for( CoreMap sen : sentences ) {
// System.out.println(sen);
// }
// NOTE: You can see each annotator get created in the StanfordCoreNLP.java class.
// Look at its function getDefaultAnnotatorPool()
pipeline.annotate(document);
// System.out.println("AFTER");
// for( CoreMap sen : sentences )
// System.out.println(sen);
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
// for( Integer id : graph.keySet() ) System.out.println(id + "\t" + graph.get(id));
entities = extractEntities(graph);
} catch( Exception ex ) {
System.out.println("--STANFORD COREF EXCEPTION-- Parses skipped...");
ex.printStackTrace();
}
return entities;
}