本文整理匯總了Java中edu.stanford.nlp.ling.CoreLabel類的典型用法代碼示例。如果您正苦於以下問題:Java CoreLabel類的具體用法?Java CoreLabel怎麽用?Java CoreLabel使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
CoreLabel類屬於edu.stanford.nlp.ling包,在下文中一共展示了CoreLabel類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: Token
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
public Token(Sentence sentence, CoreLabel coreLabel, int tokenIndex) {
this.sentence = sentence;
this.coreLabel = coreLabel;
this.index = tokenIndex;
// pure word
this.word = coreLabel.get(CoreAnnotations.TextAnnotation.class);
// this is the POS tag of the token
this.partOfSpeech = coreLabel.get(CoreAnnotations.PartOfSpeechAnnotation.class);
// this is the NE label of the token
this.namedEntity = coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class);
// lema
this.lemma = coreLabel.get(CoreAnnotations.LemmaAnnotation.class);
this.characterOffsetBegin = coreLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
this.characterOffsetEnd = coreLabel.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
// resolve edge node in semantic graph for this label
this.typeDependency = findTypeDependency(sentence.semanticGraph(), coreLabel);
// stemmer
this.stem = new IteratedLovinsStemmer().stem(word);
}
示例2: tokenize
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
public LinkedList<String> tokenize(String text) {
LinkedList<String> res = new LinkedList<>();
if (text != null) {
Annotation qaTokens = new Annotation(text);
pipelineTokens.annotate(qaTokens);
List<CoreMap> qssTokens = qaTokens.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentenceTokens : qssTokens) {
ArrayList<CoreLabel> tokens = (ArrayList<CoreLabel>) sentenceTokens.get(CoreAnnotations.TokensAnnotation.class);
for (CoreLabel t : tokens) {
String lemma = t.lemma();
String pos = t.tag();
if (!stopwords.contains(lemma)) {
String rep = representativeProvider.getRepresentative(lemma, pos);
if (!stopwords.contains(rep)) {
res.add(rep);
}
}
}
}
}
return res;
}
示例3: annotate
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
protected void annotate(StanfordCoreNLP pipeline, Annotation ann) {
if (ann.get(CoreAnnotations.SentencesAnnotation.class) == null) {
pipeline.annotate(ann);
}
else {
if (ann.get(CoreAnnotations.SentencesAnnotation.class).size() == 1) {
CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
token.remove(NaturalLogicAnnotations.OperatorAnnotation.class);
token.remove(NaturalLogicAnnotations.PolarityAnnotation.class);
}
sentence.remove(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
sentence.remove(NaturalLogicAnnotations.EntailedSentencesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
pipeline.annotate(ann);
}
}
}
示例4: readResultInContents
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
private void readResultInContents(Layer targetLayer, List<CoreLabel> tokenList, AtomicInteger n) {
int start = 0;
int end = 0;
String prevLabel = "O";
for (int i = 0; i < tokenList.size(); ++i) {
CoreLabel token = tokenList.get(i);
String label = token.get(CoreAnnotations.AnswerAnnotation.class);
if (!label.equals(prevLabel)) {
createAnnotation(targetLayer, start, end, prevLabel, n);
start = token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
prevLabel = label;
}
end = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
}
createAnnotation(targetLayer, start, end, prevLabel, n);
}
示例5: readResultInSentence
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
private void readResultInSentence(Layer targetLayer, Layer sentence, List<CoreLabel> tokenList, AtomicInteger n) {
int start = 0;
int end = 0;
String prevLabel = "O";
for (int i = 0; i < tokenList.size(); ++i) {
Annotation w = sentence.get(i);
CoreLabel token = tokenList.get(i);
String label = token.get(CoreAnnotations.AnswerAnnotation.class);
if (!label.equals(prevLabel)) {
createAnnotation(targetLayer, start, end, prevLabel, n);
start = w.getStart();
prevLabel = label;
}
end = w.getEnd();
}
createAnnotation(targetLayer, start, end, prevLabel, n);
}
示例6: getSubgraphFromWords
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
/**
* Given the sentence semantic graph and a list of words, get a subgraph containing just the words in the list
* 'words'. Each typed dependency has each word from the list as a governor.
* @param sg: sentence semantic graph
* @param words: list of words which should contain the semantic graph
* @return subgraph containing the words from 'words'
* TODO: this needs to be double checked! In some cases we have weird graphs, where there are words missing.
* E.g. the sentence 120 from NYT "The International ... ". Try this for getting the subgraph when the source is
* detected.
*/
public static SemanticGraph getSubgraphFromWords(SemanticGraph sg, ObjectArrayList<IndexedWord> words){
// Determining the root
int minInd = Integer.MAX_VALUE;
IndexedWord root = new IndexedWord();
for (IndexedWord w: words){
if (w.index() < minInd){
minInd = w.index();
root = w;
}
}
// Getting the typed dependency
ObjectArrayList<TypedDependency> tds = new ObjectArrayList<TypedDependency>();
for (TypedDependency td: sg.typedDependencies()){
if (words.contains(td.gov()) && words.contains(td.dep()))
tds.add(td);
}
// Create the semantic graph
TreeGraphNode rootTGN = new TreeGraphNode(new CoreLabel(root));
EnglishGrammaticalStructure gs = new EnglishGrammaticalStructure(tds, rootTGN);
SemanticGraph phraseSg = SemanticGraphFactory.generateUncollapsedDependencies(gs);
return phraseSg;
}
示例7: getSubgraph
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
private static SemanticGraph getSubgraph(ObjectArrayList<TypedDependency> tds, SemanticGraph sg, IndexedWord parent,
SemanticGraphEdge e, int maxPathLength, ObjectArrayList<IndexedWord> words){
Set<IndexedWord> children = sg.getChildren(parent);
for (IndexedWord child: children){
if (((sg.getShortestDirectedPathEdges(sg.getFirstRoot(), child)).size() <= maxPathLength) &&
words.contains(child)){
e = sg.getEdge(parent, child);
tds.add(new TypedDependency(e.getRelation(), parent, child));
if (sg.hasChildren(child))
getSubgraph(tds, sg, child, e, maxPathLength, words);
} // else break;
}
TreeGraphNode rootTGN = new TreeGraphNode(new CoreLabel(parent));
EnglishGrammaticalStructure gs = new EnglishGrammaticalStructure(tds, rootTGN);
return SemanticGraphFactory.generateUncollapsedDependencies(gs);
}
示例8: namedEntityDictionaryMinimization
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
/** Given a phrase, if it contains NERs, make a dictionary minimization around them **/
public void namedEntityDictionaryMinimization(List<CoreMap> remWords, List<CoreMap> matchWords){
// If (.* DT+ [RB|JJ]* NER+ .*) => drop (DT+)
this.tPattern = TokenSequencePattern.compile(REGEX.T_RB_JJ_NER);
this.tMatcher = tPattern.getMatcher(this.phrase.getWordCoreLabelList());
while (this.tMatcher.find()){
matchWords = tMatcher.groupNodes();
for (CoreMap cm: matchWords){
CoreLabel cl = new CoreLabel(cm);
if (cl.lemma() == null) cl.setLemma(cl.word());
// Check if the word is DT, drop it
if ((CoreNLPUtils.isAdj(cl.tag()) || CoreNLPUtils.isAdverb(cl.tag()))
&& cl.ner().equals(NE_TYPE.NO_NER)){
remWords.add(cm);
}
}
// Drop the words not found in dict.
this.dropWordsNotFoundInDict(matchWords, remWords);
}
// Do the safe minimization
this.namedEntitySafeMinimization(remWords, matchWords);
}
示例9: isNonSubsectiveAdj
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
/**
* Given an adjective (a CoreLabel object) check if it is non-subsective
* @param adj: a word (an adjective)
* @return true, if the adjective is non-subsective, false otherwise
*/
private boolean isNonSubsectiveAdj(CoreLabel adj){
if (WORDS.NON_SUBSECTIVE_JJ_CF.contains(adj.lemma()))
return true;
else if (WORDS.NON_SUBSECTIVE_JJ_CF.contains(adj.word()))
return true;
else if (WORDS.NON_SUBSECTIVE_JJ_MODAL.contains(adj.lemma()))
return true;
else if (WORDS.NON_SUBSECTIVE_JJ_MODAL.contains(adj.word()))
return true;
else if (WORDS.NON_SUBSECTIVE_JJ_TEMP.contains(adj.word()))
return true;
else if (WORDS.NON_SUBSECTIVE_JJ_TEMP.contains(adj.lemma()))
return true;
return false;
}
示例10: lemmatize
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
public List<List<String>> lemmatize(String documentText)
{
List<List<String>> lemmas = new ArrayList<List<String>>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
List<String> sentence_lemmas = new ArrayList<String>();
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
sentence_lemmas.add(token.get(LemmaAnnotation.class));
}
lemmas.add(sentence_lemmas);
}
return lemmas;
}
示例11: tagAndTokenize
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
public Pair<List<String>, List<String>> tagAndTokenize(String documentText)
{
List<String> tags = new ArrayList<String>();
List<String> tokens = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
tokens.add(token.word());
}
}
return new Pair<List<String>, List<String>>(tags, tokens);
}
示例12: tag
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
public List<String> tag(String documentText)
{
List<String> tags = new ArrayList<String>();
// create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.parser.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
tags.add(token.get(PartOfSpeechAnnotation.class));
}
}
return tags;
}
示例13: preprocess
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
public Concept preprocess(Concept c) {
if (this.tagger == null)
this.tagger = new MaxentTagger("ext_models/pos_tagger/english-left3words-distsim.tagger");
if (this.ner == null)
this.ner = CRFClassifier.getClassifierNoExceptions("ext_models/ner/english.all.3class.distsim.crf.ser.gz");
List<CoreLabel> words = tokFactory.getTokenizer(new StringReader(c.name)).tokenize();
tagger.tagCoreLabels(words);
words = ner.classifySentence(words);
words = this.addLemmas(words);
List<PToken> tokens = new ArrayList<PToken>();
for (CoreLabel word : words) {
PToken t = new PToken(word.originalText());
t.pos = word.tag();
t.neTag = word.get(CoreAnnotations.AnswerAnnotation.class);
t.lemma = word.get(LemmaAnnotation.class);
tokens.add(t);
}
c.tokenList = tokens;
return c;
}
示例14: lemmatize
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
public PToken lemmatize(PToken t) {
List<CoreLabel> words = tokFactory.getTokenizer(new StringReader(t.text.toLowerCase())).tokenize();
if (words.size() > 1)
return t;
words.get(0).setTag(t.pos);
if (t.pos.startsWith("N") && t.pos.contains("P")) {
String tag = t.pos.replace("P", "");
if (t.text.toLowerCase().charAt(t.text.length() - 1) == 's')
tag = "NNS";
words.get(0).setTag(tag);
}
words = this.addLemmas(words);
t.lemma = words.get(0).get(LemmaAnnotation.class);
return t;
}
示例15: initialize
import edu.stanford.nlp.ling.CoreLabel; //導入依賴的package包/類
/**
* Initializes the tokenizer to detect date columns.
*/
public void initialize() {
Properties props = new Properties();
pipeline.addAnnotator(new TokenizerAnnotator(false) {
@Override
public Tokenizer<CoreLabel> getTokenizer(Reader r) {
// TODO Auto-generated method stub
return new PTBTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), "");
}
});
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(false));
pipeline.addAnnotator(new TimeAnnotator("sutime", props));
}