本文整理汇总了Java中edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation类的典型用法代码示例。如果您正苦于以下问题:Java PartOfSpeechAnnotation类的具体用法?Java PartOfSpeechAnnotation怎么用?Java PartOfSpeechAnnotation使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PartOfSpeechAnnotation类属于edu.stanford.nlp.ling.CoreAnnotations包,在下文中一共展示了PartOfSpeechAnnotation类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: tagAndTokenize
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public Pair<List<String>, List<String>> tagAndTokenize(String documentText)
{
List<String> tags = new ArrayList<String>();
List<String> tokens = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
tokens.add(token.word());
}
}
return new Pair<List<String>, List<String>>(tags, tokens);
}
示例2: tag
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<String> tag(String documentText)
{
List<String> tags = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
}
}
return tags;
}
示例3: ExtractPosTagsFile
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTagsFile(File filePath) throws Exception {
List<String> lstData=ExtractData(filePath);
List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
for(String str:lstData)
{
Annotation annotation = new Annotation(str);
pipeline.annotate(annotation);
List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap map:senten)
{
map.get(TokensAnnotation.class).stream().forEach((tok) -> {
String PosTagg=tok.get(PartOfSpeechAnnotation.class);
lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),PosTagg));
});
}
}
return lstTaggedSentences;
}
示例4: ExtractPosTags
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTags(List<String> inputData)
{
List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
for(String str:inputData)
{
Annotation annotation = new Annotation(str);
pipeline.annotate(annotation);
List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap map:senten)
{
map.get(TokensAnnotation.class).stream().forEach((tok) -> {
String getPosTag=tok.get(PartOfSpeechAnnotation.class);
lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
});
}
}
return lstTaggedSentences;
}
示例5: ExtractPosTagsSentence
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTagsSentence(String sentence)
{
List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
Annotation annotation = new Annotation(sentence);
pipeline.annotate(annotation);
List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap map:senten)
{
map.get(TokensAnnotation.class).stream().forEach((tok) -> {
String getPosTag=tok.get(PartOfSpeechAnnotation.class);
lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
});
}
return lstTaggedSentences;
}
示例6: requires
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
/**
* Using the same requirements as the CoreNLP NERCombinerAnnotator
*/
public Set<Class<? extends CoreAnnotation>> requires() {
return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
CoreAnnotations.TextAnnotation.class,
CoreAnnotations.TokensAnnotation.class,
CoreAnnotations.SentencesAnnotation.class,
CoreAnnotations.CharacterOffsetBeginAnnotation.class,
CoreAnnotations.CharacterOffsetEndAnnotation.class,
CoreAnnotations.PartOfSpeechAnnotation.class,
CoreAnnotations.LemmaAnnotation.class,
CoreAnnotations.BeforeAnnotation.class,
CoreAnnotations.AfterAnnotation.class,
CoreAnnotations.TokenBeginAnnotation.class,
CoreAnnotations.TokenEndAnnotation.class,
CoreAnnotations.IndexAnnotation.class,
CoreAnnotations.OriginalTextAnnotation.class,
CoreAnnotations.SentenceIndexAnnotation.class
)));
}
示例7: getWordnetPOS
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public static POS getWordnetPOS(CoreLabel word){
String stanfordPOS = word.getString(PartOfSpeechAnnotation.class);
if(stanfordPOS.startsWith("N"))
return POS.NOUN;
if(stanfordPOS.startsWith("J"))
return POS.ADJECTIVE;
if(stanfordPOS.startsWith("V"))
return POS.VERB;
if(stanfordPOS.startsWith("R"))
return POS.ADVERB;
return null;
}
示例8: tokenizeText
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
protected TermTokenizedText tokenizeText(String text, PosTaggingTermFilter filter) {
Annotation document = new Annotation(text);
pipeline.annotate(document);
List<CoreLabel> tokens = document.get(TokensAnnotation.class);
TermTokenizedText ttText = new TermTokenizedText();
List<Term> terms = ttText.getTermTokenizedText();
Term term;
int lastEnd = -1;
for (CoreLabel token : tokens) {
// We have to make sure that points are not added twice (which can
// happen if they are interpreted as both - end of a sentence and
// punctuation of an abbreviation).
if (!((token.beginPosition() <= lastEnd) && ".".equals(token.get(PartOfSpeechAnnotation.class)))) {
term = transformToTerm(token);
if ((filter == null) || (filter.isTermGood(term))) {
terms.add(term);
}
lastEnd = token.endPosition();
}
}
return ttText;
}
示例9: posTagLineToArray
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
/**
*
* POS-tag sentence and return an array of Pairs that contain the POS-tag and word.
* @param line
* @return
*/
public static fig.basic.Pair<String, String>[] posTagLineToArray(String line)
{
Annotation document = new Annotation(line);
pipeline.annotate(document);
List<fig.basic.Pair<String, String>> out = new ArrayList<>();
for(CoreMap sentence: document.get(SentencesAnnotation.class))
{
List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
for(CoreLabel token : tokens)
{
out.add(new fig.basic.Pair(token.get(PartOfSpeechAnnotation.class), token.get(TextAnnotation.class)));
}
}
return out.toArray(new fig.basic.Pair[0]);
}
示例10: makeVertex
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
private IndexedWord makeVertex(String word) {
Integer index; // initialized below
Pair<String, Integer> wordAndIndex = readWordAndIndex(word);
if (wordAndIndex != null) {
word = wordAndIndex.first();
index = wordAndIndex.second();
} else {
index = getNextFreeIndex();
}
indexesUsed.add(index);
// Note that, despite the use of indexesUsed and getNextFreeIndex(),
// nothing is actually enforcing that no indexes are used twice. This
// could occur if some words in the string representation being parsed
// come with index markers and some do not.
IndexedWord ifl = new IndexedWord(null, 0, index);
// System.err.println("SemanticGraphParsingTask>>> word = " + word);
// System.err.println("SemanticGraphParsingTask>>> index = " + index);
// System.err.println("SemanticGraphParsingTask>>> indexesUsed = " +
// indexesUsed);
String[] wordAndTag = word.split("/");
ifl.set(TextAnnotation.class, wordAndTag[0]);
if (wordAndTag.length > 1)
ifl.set(PartOfSpeechAnnotation.class, wordAndTag[1]);
return ifl;
}
示例11: getTaggedSentences
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<TaggedToken[]> getTaggedSentences(String text) {
ArrayList<TaggedToken> result = new ArrayList<TaggedToken>();
ArrayList<TaggedToken[]> output = new ArrayList<TaggedToken[]>();
Annotation document1 = new Annotation(text);
pipeline.annotate(document1);
List<CoreMap> sentences = document1.get(SentencesAnnotation.class);
for(CoreMap sentence : sentences) {
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
result.add(new TaggedToken(token.get(PartOfSpeechAnnotation.class), token.toString()));
}
output.add((TaggedToken[]) result.toArray());
result.removeAll(result);
}
return output;
}
示例12: getTaggedSentencesString
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<String> getTaggedSentencesString(String text) {
ArrayList<String> result = new ArrayList<String>();
Annotation document1 = new Annotation(text);
pipeline.annotate(document1);
List<CoreMap> sentences = document1.get(SentencesAnnotation.class);
String resultString = "";
TaggedToken taggedToken;
for(CoreMap sentence : sentences) {
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
taggedToken = new TaggedToken(token.get(PartOfSpeechAnnotation.class), token.toString());
resultString = resultString + "[" + taggedToken.tag + "]" + taggedToken.token + " ";
}
result.add(resultString);
resultString = "";
}
return result ;
}
示例13: PreNERCoreLabelWrapper
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
/**
*
*/
public PreNERCoreLabelWrapper(final CoreLabel cl) {
this.orig = new TokenizedCoreLabelWrapper(cl);
this.posTag = Optional.ofNullable(cl.get(PartOfSpeechAnnotation.class));
this.nerTag = Optional.ofNullable(cl.get(NamedEntityTagAnnotation.class));
this.lemmaTag = Optional.ofNullable(cl.get(LemmaAnnotation.class));
}
示例14: tagTokens
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<String> tagTokens(String text) {
List<String> tagged = new ArrayList<String>();
Annotation document = runPipeline(text);
// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys
// and has values with custom types
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
// this is the text of the token
String word = token.get(TextAnnotation.class);
// this is the POS tag of the token
String pos = token.get(PartOfSpeechAnnotation.class);
// this is the NER label of the token
String ne = token.get(NamedEntityTagAnnotation.class);
// this is the lemma of the token
String lemma = token.get(LemmaAnnotation.class);
// this is the sentence index
int sentId = token.get(SentenceIndexAnnotation.class);
tagged.add(word + "/" + pos + "/" + ne + "/" + lemma + "/" + sentId);
}
}
return tagged;
}
示例15: buildMention
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public Mention buildMention(Annotation annotation, int sentId,
int startToken, int endToken) {
CoreMap sentAnn = annotation.get(SentencesAnnotation.class).get(sentId);
List<CoreLabel> tokens = sentAnn.get(TokensAnnotation.class);
// create a Mention object
Mention.Builder m = Mention.newBuilder();
m.setStart(startToken);
m.setEnd(endToken);
for (int i = 0; i < tokens.size(); i++) {
m.addTokens(tokens.get(i).get(OriginalTextAnnotation.class));
m.addPosTags(tokens.get(i).get(PartOfSpeechAnnotation.class));
}
m.setEntityName("");
m.setFileid("on-the-fly");
m.setSentid(sentId);
// dependency
String depStr = StanfordDependencyResolver.getString(sentAnn);
if (depStr != null) {
for (String d : depStr.split("\t")) {
Matcher match = Preprocessing.depPattern.matcher(d);
if (match.find()) {
m.addDeps(Dependency.newBuilder().setType(match.group(1))
.setGov(Integer.parseInt(match.group(3)) - 1)
.setDep(Integer.parseInt(match.group(5)) - 1)
.build());
} else {
}
}
}
return m.build();
}