本文整理汇总了Java中edu.stanford.nlp.ling.Sentence.toCoreLabelList方法的典型用法代码示例。如果您正苦于以下问题:Java Sentence.toCoreLabelList方法的具体用法?Java Sentence.toCoreLabelList怎么用?Java Sentence.toCoreLabelList使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类edu.stanford.nlp.ling.Sentence
的用法示例。
在下文中一共展示了Sentence.toCoreLabelList方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parse_tokens
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
/**
* @param tokens One sentence worth of tokens at a time.
* @return A ParseTree object of the String representation of the tree, plus its probability.
* @throws TApplicationException
*/
public ParseTree parse_tokens(List<String> tokens, List<String> outputFormat) throws TApplicationException
{
try
{
treePrinter = ParserUtil.setOptions(outputFormat, tlp);
// a single sentence worth of tokens
String[] tokenArray = new String[tokens.size()];
tokens.toArray(tokenArray);
List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(tokenArray);
Tree parseTree = parser.apply(crazyStanfordFormat);
return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score());
}
catch (Exception e)
{
// FIXME
throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
}
}
示例2: demoAPI
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
/**
* demoAPI demonstrates other ways of calling the parser with already
* tokenized text, or in some cases, raw text that needs to be tokenized as
* a single sentence. Output is handled with a TreePrint object. Note that
* the options used when creating the TreePrint can determine what results
* to print out. Once again, one can capture the output by passing a
* PrintWriter to TreePrint.printTree.
*
* difference: already tokenized text
*
*
*/
public static void demoAPI(LexicalizedParser lp) {
// This option shows parsing a list of correctly tokenized words
String[] sent = { "This", "is", "an", "easy", "sentence", "." };
List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
Tree parse = lp.apply(rawWords);
parse.pennPrint();
System.out.println();
// This option shows loading and using an explicit tokenizer
String sent2 = "Hey @Apple, pretty much all your products are amazing. You blow minds every time you launch a new gizmo."
+ " that said, your hold music is crap";
TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(
new CoreLabelTokenFactory(), "");
Tokenizer<CoreLabel> tok = tokenizerFactory
.getTokenizer(new StringReader(sent2));
List<CoreLabel> rawWords2 = tok.tokenize();
parse = lp.apply(rawWords2);
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
// You can also use a TreePrint object to print trees and dependencies
TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
tp.printTree(parse);
}
示例3: getAnnotationFromParseTrees
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public static Annotation getAnnotationFromParseTrees(List<String> parseTrees)
{
List<CoreMap> sentences = new ArrayList<CoreMap>();
List<String> allTokens = new ArrayList<String>();
int tokenOffset = 0;
for (String tree : parseTrees)
{
List<String> tokens = new ArrayList<String>();
String[] firstSplit = tree.split("\\) ");
for (String f : firstSplit)
{
String[] secondSplit = f.split("\\(");
String[] tagAndToken = secondSplit[secondSplit.length-1].trim().replaceAll("\\)+$", "").split(" ");
tokens.add(tagAndToken[1]);
}
allTokens.addAll(tokens);
String[] tokensArr = new String[tokens.size()];
tokens.toArray(tokensArr);
List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
String originalText = Sentence.listToString(tokens);
CoreMap sentence = new Annotation(originalText);
sentence.set(CharacterOffsetBeginAnnotation.class, 0);
sentence.set(CharacterOffsetEndAnnotation.class, sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
tokenOffset += sentenceTokens.size();
sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset);
ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(), sentence, Tree.valueOf(tree));
sentences.add(sentence);
}
Annotation allSentences = new Annotation(Sentence.listToString(allTokens));
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
return allSentences;
}
示例4: getAnnotationFromTokens
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public static Annotation getAnnotationFromTokens(List<String> tokens, Annotation existingAnnotation)
{
List<CoreMap> sentences = new ArrayList<CoreMap>();
Annotation allSentences;
String[] tokensArr = new String[tokens.size()];
tokens.toArray(tokensArr);
List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
String originalText = Sentence.listToString(tokens);
CoreMap sentence = new Annotation(originalText);
sentence.set(CharacterOffsetBeginAnnotation.class, 0);
sentence.set(CharacterOffsetEndAnnotation.class,
sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
sentence.set(CoreAnnotations.TokenEndAnnotation.class, sentenceTokens.size());
sentences.add(sentence);
if (existingAnnotation != null)
{
sentences.addAll(existingAnnotation.get(CoreAnnotations.SentencesAnnotation.class));
allSentences = existingAnnotation.copy();
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
}
else
{
allSentences = new Annotation(Sentence.listToString(tokens));
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
}
return allSentences;
}
示例5: tag_tokenized_sentence
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public List<TaggedToken> tag_tokenized_sentence(List<String> tokenizedSentence)
{
// a single sentence worth of tokens
String[] tokenArray = new String[tokenizedSentence.size()];
tokenizedSentence.toArray(tokenArray);
List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(tokenArray);
return tagSingleSentence(crazyStanfordFormat);
}
示例6: demoAPI
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public static void demoAPI(LexicalizedParser lp) {
// This option shows parsing a list of correctly tokenized words
String[] sent = { "This", "is", "an", "easy", "sentence", "." };
List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
Tree parse = lp.apply(rawWords);
parse.pennPrint();
System.out.println();
// This option shows loading and using an explicit tokenizer
String sent2 = "This is another sentence.";
TokenizerFactory<CoreLabel> tokenizerFactory =
PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> rawWords2 =
tokenizerFactory.getTokenizer(new StringReader(sent2)).tokenize();
parse = lp.apply(rawWords2);
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
tp.printTree(parse);
}
示例7: StringToIOB
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public static List<CoreLabel> StringToIOB(String str, Character segMarker) {
// Whitespace tokenization
List<CoreLabel> toks = Sentence.toCoreLabelList(str.trim().split("\\s+"));
return StringToIOB(toks, segMarker);
}
示例8: getListTaggerWord
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public List<TaggedWord> getListTaggerWord(String frase) {
String[] sent = frase.split(" ");
List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
Tree parse = lp.apply(rawWords);
return parse.taggedYield();
}