当前位置: 首页>>代码示例>>Java>>正文


Java PartOfSpeechAnnotation类代码示例

本文整理汇总了Java中edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation的典型用法代码示例。如果您正苦于以下问题:Java PartOfSpeechAnnotation类的具体用法?Java PartOfSpeechAnnotation怎么用?Java PartOfSpeechAnnotation使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PartOfSpeechAnnotation类属于edu.stanford.nlp.ling.CoreAnnotations包,在下文中一共展示了PartOfSpeechAnnotation类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: tagAndTokenize

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public Pair<List<String>, List<String>> tagAndTokenize(String documentText)
{
	List<String> tags = new ArrayList<String>();
	List<String> tokens = new ArrayList<String>();

	// create an empty Annotation just with the given text
	Annotation document = new Annotation(documentText);

	// run all Annotators on this text
	this.parser.annotate(document);

	// Iterate over all of the sentences found
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	for(CoreMap sentence: sentences) {
		// Iterate over all tokens in a sentence
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			// Retrieve and add the lemma for each word into the
			// list of lemmas
			tags.add(token.get(PartOfSpeechAnnotation.class));
			tokens.add(token.word());
		}
	}

	return new Pair<List<String>, List<String>>(tags, tokens);
}
 
开发者ID:uwnlp,项目名称:recipe-interpretation,代码行数:26,代码来源:Lemmatizer.java

示例2: tag

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<String> tag(String documentText)
{
	List<String> tags = new ArrayList<String>();

	// create an empty Annotation just with the given text
	Annotation document = new Annotation(documentText);

	// run all Annotators on this text
	this.parser.annotate(document);

	// Iterate over all of the sentences found
	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
	for(CoreMap sentence: sentences) {
		// Iterate over all tokens in a sentence
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			// Retrieve and add the lemma for each word into the
			// list of lemmas
			tags.add(token.get(PartOfSpeechAnnotation.class));
		}
	}

	return tags;
}
 
开发者ID:uwnlp,项目名称:recipe-interpretation,代码行数:24,代码来源:Lemmatizer.java

示例3: ExtractPosTagsFile

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTagsFile(File filePath) throws Exception {
       List<String> lstData=ExtractData(filePath);
       List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
       Properties props = new Properties();
       props.setProperty("annotators", "tokenize,ssplit,pos");
       StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
       for(String str:lstData)
       {
       Annotation annotation = new Annotation(str);
       pipeline.annotate(annotation);
       List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
       for(CoreMap map:senten)
       {
           map.get(TokensAnnotation.class).stream().forEach((tok) -> {
               String PosTagg=tok.get(PartOfSpeechAnnotation.class);
               lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),PosTagg));
           });
       }
     } 
    return lstTaggedSentences;
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:23,代码来源:ExtractPosTagData.java

示例4: ExtractPosTags

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTags(List<String> inputData) 
{
	List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();		
       Properties props = new Properties();
       props.setProperty("annotators", "tokenize,ssplit,pos");
       StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
       for(String str:inputData)
       {
       Annotation annotation = new Annotation(str);
       pipeline.annotate(annotation);
       List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
       for(CoreMap map:senten)
       {
           map.get(TokensAnnotation.class).stream().forEach((tok) -> {
               String getPosTag=tok.get(PartOfSpeechAnnotation.class);
               lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
           });
       }
     } 
    return lstTaggedSentences;
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:23,代码来源:ExtractPosTagData.java

示例5: ExtractPosTagsSentence

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
public List<ExtractPosTag> ExtractPosTagsSentence(String sentence) 
{
       List<ExtractPosTag> lstTaggedSentences = new ArrayList<>();
       Properties props = new Properties();
       props.setProperty("annotators", "tokenize,ssplit,pos");
       StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

       Annotation annotation = new Annotation(sentence);
       pipeline.annotate(annotation);
       List<CoreMap> senten=annotation.get(CoreAnnotations.SentencesAnnotation.class);
       for(CoreMap map:senten)
       {
           map.get(TokensAnnotation.class).stream().forEach((tok) -> {
               String getPosTag=tok.get(PartOfSpeechAnnotation.class);
               lstTaggedSentences.add(new ExtractPosTag(tok.originalText(),getPosTag));
           });
       }        
       return lstTaggedSentences;
}
 
开发者ID:unsw-cse-soc,项目名称:Data-curation-API,代码行数:21,代码来源:ExtractPosTagData.java

示例6: requires

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
/**
 * Using the same requirements as the CoreNLP NERCombinerAnnotator
 */
public Set<Class<? extends CoreAnnotation>> requires() {
    return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
            CoreAnnotations.TextAnnotation.class,
            CoreAnnotations.TokensAnnotation.class,
            CoreAnnotations.SentencesAnnotation.class,
            CoreAnnotations.CharacterOffsetBeginAnnotation.class,
            CoreAnnotations.CharacterOffsetEndAnnotation.class,
            CoreAnnotations.PartOfSpeechAnnotation.class,
            CoreAnnotations.LemmaAnnotation.class,
            CoreAnnotations.BeforeAnnotation.class,
            CoreAnnotations.AfterAnnotation.class,
            CoreAnnotations.TokenBeginAnnotation.class,
            CoreAnnotations.TokenEndAnnotation.class,
            CoreAnnotations.IndexAnnotation.class,
            CoreAnnotations.OriginalTextAnnotation.class,
            CoreAnnotations.SentenceIndexAnnotation.class
        )));
}
 
开发者ID:toliwa,项目名称:CoreNLP-jMWE,代码行数:23,代码来源:JMWEAnnotator.java

示例7: getWordnetPOS

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public static POS getWordnetPOS(CoreLabel word){
	
	String stanfordPOS = word.getString(PartOfSpeechAnnotation.class);
	
	if(stanfordPOS.startsWith("N"))
		return POS.NOUN;
	if(stanfordPOS.startsWith("J"))
		return POS.ADJECTIVE;
	if(stanfordPOS.startsWith("V"))
		return POS.VERB;
	if(stanfordPOS.startsWith("R"))
		return POS.ADVERB;
	
	return null;
	
	
}
 
开发者ID:everling,项目名称:PRECISE,代码行数:18,代码来源:Parser.java

示例8: tokenizeText

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
@Override
protected TermTokenizedText tokenizeText(String text, PosTaggingTermFilter filter) {
    Annotation document = new Annotation(text);
    pipeline.annotate(document);

    List<CoreLabel> tokens = document.get(TokensAnnotation.class);
    TermTokenizedText ttText = new TermTokenizedText();
    List<Term> terms = ttText.getTermTokenizedText();
    Term term;
    int lastEnd = -1;
    for (CoreLabel token : tokens) {
        // We have to make sure that points are not added twice (which can
        // happen if they are interpreted as both - end of a sentence and
        // punctuation of an abbreviation).
        if (!((token.beginPosition() <= lastEnd) && ".".equals(token.get(PartOfSpeechAnnotation.class)))) {
            term = transformToTerm(token);
            if ((filter == null) || (filter.isTermGood(term))) {
                terms.add(term);
            }
            lastEnd = token.endPosition();
        }
    }
    return ttText;
}
 
开发者ID:dice-group,项目名称:Cetus,代码行数:25,代码来源:StanfordTaggerWrapper.java

示例9: posTagLineToArray

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
/**
 * 
 * POS-tag sentence and return an array of Pairs that contain the POS-tag and word.
 * @param line
 * @return 
 */
public static fig.basic.Pair<String, String>[] posTagLineToArray(String line)
{
    Annotation document = new Annotation(line);
    pipeline.annotate(document);    
    List<fig.basic.Pair<String, String>> out = new ArrayList<>();
    for(CoreMap sentence: document.get(SentencesAnnotation.class)) 
    {
        List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
        for(CoreLabel token : tokens)
        {
            out.add(new fig.basic.Pair(token.get(PartOfSpeechAnnotation.class), token.get(TextAnnotation.class))); 
        }
    }        
    return out.toArray(new fig.basic.Pair[0]);
}
 
开发者ID:sinantie,项目名称:PLTAG,代码行数:22,代码来源:PosTagger.java

示例10: makeVertex

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
private IndexedWord makeVertex(String word) {
  Integer index; // initialized below
  Pair<String, Integer> wordAndIndex = readWordAndIndex(word);
  if (wordAndIndex != null) {
    word = wordAndIndex.first();
    index = wordAndIndex.second();
  } else {
    index = getNextFreeIndex();
  }
  indexesUsed.add(index);
  // Note that, despite the use of indexesUsed and getNextFreeIndex(),
  // nothing is actually enforcing that no indexes are used twice. This
  // could occur if some words in the string representation being parsed
  // come with index markers and some do not.
  IndexedWord ifl = new IndexedWord(null, 0, index);
  // System.err.println("SemanticGraphParsingTask>>> word = " + word);
  // System.err.println("SemanticGraphParsingTask>>> index = " + index);
  // System.err.println("SemanticGraphParsingTask>>> indexesUsed = " +
  // indexesUsed);
  String[] wordAndTag = word.split("/");
  ifl.set(TextAnnotation.class, wordAndTag[0]);
  if (wordAndTag.length > 1)
    ifl.set(PartOfSpeechAnnotation.class, wordAndTag[1]);
  return ifl;
}
 
开发者ID:amark-india,项目名称:eventspotter,代码行数:26,代码来源:SemanticGraph.java

示例11: getTaggedSentences

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<TaggedToken[]> getTaggedSentences(String text) {
	ArrayList<TaggedToken> result = new ArrayList<TaggedToken>();
	ArrayList<TaggedToken[]> output = new ArrayList<TaggedToken[]>();
	
	Annotation document1 = new Annotation(text);
	pipeline.annotate(document1);
	List<CoreMap> sentences = document1.get(SentencesAnnotation.class);
	for(CoreMap sentence : sentences) {
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			result.add(new TaggedToken(token.get(PartOfSpeechAnnotation.class), token.toString()));
		}
		output.add((TaggedToken[]) result.toArray());
		result.removeAll(result);
	}
	
	return output;
}
 
开发者ID:NextCenturyCorporation,项目名称:EVEREST-TripletExtraction,代码行数:18,代码来源:CoreNlpPOSTagger.java

示例12: getTaggedSentencesString

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<String> getTaggedSentencesString(String text) {
	ArrayList<String> result = new ArrayList<String>();
	
	Annotation document1 = new Annotation(text);
	pipeline.annotate(document1);
	List<CoreMap> sentences = document1.get(SentencesAnnotation.class);
	
	String resultString = "";
	TaggedToken taggedToken;
	
	for(CoreMap sentence : sentences) {
		
		for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
			taggedToken = new TaggedToken(token.get(PartOfSpeechAnnotation.class), token.toString());
			resultString = resultString + "[" + taggedToken.tag + "]" + taggedToken.token + " ";
		}
		
		result.add(resultString);
		resultString = "";
	}
	
	return result ;
}
 
开发者ID:NextCenturyCorporation,项目名称:EVEREST-TripletExtraction,代码行数:24,代码来源:CoreNlpPOSTagger.java

示例13: PreNERCoreLabelWrapper

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
/**
 *
 */
public PreNERCoreLabelWrapper(final CoreLabel cl) {
  this.orig = new TokenizedCoreLabelWrapper(cl);

  this.posTag = Optional.ofNullable(cl.get(PartOfSpeechAnnotation.class));
  this.nerTag = Optional.ofNullable(cl.get(NamedEntityTagAnnotation.class));
  this.lemmaTag = Optional.ofNullable(cl.get(LemmaAnnotation.class));
}
 
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:11,代码来源:PreNERCoreLabelWrapper.java

示例14: tagTokens

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public List<String> tagTokens(String text) {

		List<String> tagged = new ArrayList<String>();

		Annotation document = runPipeline(text);

		// these are all the sentences in this document
		// a CoreMap is essentially a Map that uses class objects as keys
		// and has values with custom types
		List<CoreMap> sentences = document.get(SentencesAnnotation.class);

		for (CoreMap sentence : sentences) {
			// traversing the words in the current sentence
			// a CoreLabel is a CoreMap with additional token-specific methods
			for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
				// this is the text of the token
				String word = token.get(TextAnnotation.class);
				// this is the POS tag of the token
				String pos = token.get(PartOfSpeechAnnotation.class);
				// this is the NER label of the token
				String ne = token.get(NamedEntityTagAnnotation.class);
				// this is the lemma of the token
				String lemma = token.get(LemmaAnnotation.class);
				// this is the sentence index
				int sentId = token.get(SentenceIndexAnnotation.class);

				tagged.add(word + "/" + pos + "/" + ne + "/" + lemma + "/" + sentId);
			}

		}

		return tagged;
	}
 
开发者ID:sunil3590,项目名称:artificial-guy,代码行数:34,代码来源:NLP.java

示例15: buildMention

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation; //导入依赖的package包/类
public Mention buildMention(Annotation annotation, int sentId,
		int startToken, int endToken) {
	CoreMap sentAnn = annotation.get(SentencesAnnotation.class).get(sentId);
	List<CoreLabel> tokens = sentAnn.get(TokensAnnotation.class);
	// create a Mention object
	Mention.Builder m = Mention.newBuilder();
	m.setStart(startToken);
	m.setEnd(endToken);
	for (int i = 0; i < tokens.size(); i++) {
		m.addTokens(tokens.get(i).get(OriginalTextAnnotation.class));
		m.addPosTags(tokens.get(i).get(PartOfSpeechAnnotation.class));
	}
	m.setEntityName("");
	m.setFileid("on-the-fly");
	m.setSentid(sentId);

	// dependency
	String depStr = StanfordDependencyResolver.getString(sentAnn);
	if (depStr != null) {
		for (String d : depStr.split("\t")) {
			Matcher match = Preprocessing.depPattern.matcher(d);
			if (match.find()) {
				m.addDeps(Dependency.newBuilder().setType(match.group(1))
						.setGov(Integer.parseInt(match.group(3)) - 1)
						.setDep(Integer.parseInt(match.group(5)) - 1)
						.build());
			} else {

			}
		}
	}
	return m.build();
}
 
开发者ID:zhangcongle,项目名称:NewsSpikeRe,代码行数:34,代码来源:FigerSystem.java


注:本文中的edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。