本文整理汇总了Java中edu.stanford.nlp.ling.Sentence类的典型用法代码示例。如果您正苦于以下问题:Java Sentence类的具体用法?Java Sentence怎么用?Java Sentence使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Sentence类属于edu.stanford.nlp.ling包,在下文中一共展示了Sentence类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildDependeciesTrees
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public ArrayList<DependencyTree> buildDependeciesTrees(ArrayList<String> texts) {
ArrayList<DependencyTree> dtList = new ArrayList<DependencyTree>();
int textId = 1;
for(String text: texts) {
System.out.println(textId);
String[] sent = text.split(" ");
Tree parse = this.lp.apply(Sentence.toWordList(sent));
GrammaticalStructure gs = this.gsf.newGrammaticalStructure(parse);
Collection<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
DependencyTree curDT = new DependencyTree();
for(TypedDependency td: tdl) {
TreeGraphNode dep = td.dep();
TreeGraphNode gov = td.gov();
GrammaticalRelation gr = td.reln();
String depString = gr.toString() + "^^^" + gov.toString() + "^^^" + dep.toString();
curDT.addDependency(depString);
}
textId++;
dtList.add(curDT);
}
return dtList;
}
示例2: getPCFGScore
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
* Parses a sentence and returns the PCFG score as a confidence measure.
*
* @param sentence a sentence
* @return PCFG score
*/
@SuppressWarnings("unchecked")
public static double getPCFGScore(String sentence) {
if (tlp == null || parser == null)
throw new RuntimeException("Parser has not been initialized");
// parse the sentence to produce PCFG score
log.debug("Parsing sentence");
double score;
synchronized (parser) {
Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
List<Word> words = tokenizer.tokenize();
log.debug("Tokenization: "+words);
parser.parse(new Sentence(words));
score = parser.getPCFGScore();
}
return score;
}
示例3: tokenize
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
* Splits the sentence into individual tokens.
*
* @param sentence Input sentence
* @return Array of tokens
*/
public static String[] tokenize(String sentence) {
List t = MaxentTagger.tokenizeText(new StringReader(sentence));
List<String> tokens = new ArrayList<String>();
for (int j = 0; j < t.size(); j++) {
Sentence s1 = (Sentence) t.get(j);
for (int i = 0; i < s1.length(); i++) {
HasWord w = s1.getHasWord(i);
tokens.add(w.word());
}
}
return (String[]) tokens.toArray(new String[tokens.size()]);
}
示例4: tagPos
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
* Tags the tokens with part of speech
*
* @param tokens Array of token strings
* @return Part of speech tags
*/
public static String[] tagPos(String[] tokens) {
Sentence untagged = createSentence(tokens);
Sentence tagged = MaxentTagger.tagSentence(untagged);
String[] pos = new String[tagged.size()];
for (int i = 0; i < tagged.size(); i++) {
HasWord w = (HasWord) tagged.get(i);
String[] s = w.toString().split("/");
if (s.length > 1)
pos[i] = s[s.length - 1];
else
pos[i] = "";
}
return pos;
}
示例5: getPCFGScore
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
* Parses a sentence and returns the PCFG score as a confidence measure.
*
* @param sentence
* a sentence
* @return PCFG score
*/
@SuppressWarnings("unchecked")
public static double getPCFGScore(String sentence)
{
if (tlp == null || parser == null)
throw new RuntimeException("Parser has not been initialized");
// parse the sentence to produce PCFG score
log.debug("Parsing sentence");
double score;
synchronized (parser)
{
Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(
new StringReader(sentence));
List<Word> words = tokenizer.tokenize();
log.debug("Tokenization: " + words);
parser.parse(new Sentence(words));
score = parser.getPCFGScore();
}
return score;
}
示例6: myExtractor
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
private static Sentence<TaggedWord> myExtractor(Tree t, Sentence<TaggedWord> ty) {
Tree[] kids = t.children();
// this inlines the content of t.isPreTerminal()
if (kids.length == 1 && kids[0].isLeaf()) {
if (t.label() instanceof HasTag) {
// System.err.println("Object is: " + ((CategoryWordTag) t.label()).toString("full"));
ty.add(new TaggedWord(kids[0].label().value(), ((HasTag) t.label()).tag()));
} else {
// System.err.println("Object is: " + StringUtils.getShortClassName(t.label()) + " " + t.label());
ty.add(new TaggedWord(kids[0].label().value(), t.label().value()));
}
} else {
for (int i = 0; i < kids.length; i++) {
myExtractor(kids[i], ty);
}
}
return ty;
}
示例7: yield
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
* Gets the yield of the tree. The <code>Label</code> of all leaf nodes
* is returned
* as a list ordered by the natural left to right order of the
* leaves. Null values, if any, are inserted into the list like any
* other value.
* <p><i>Implementation notes:</i> c. 2003: This has been rewritten to thread, so only one List
* is used. 2007: This method was duplicated to start to give type safety to Sentence.
* This method will now make a Word for any Leaf which does not itself implement HasWord, and
* put the Word into the Sentence, so the Sentence elements MUST implement HasWord.
*
* @param y The list in which the yield of the tree will be placed.
* Normally, this will be empty when the routine is called, but
* if not, the new yield is added to the end of the list.
* @return a <code>List</code> of the data in the tree's leaves.
*/
@SuppressWarnings("unchecked")
public <X extends HasWord> Sentence<X> yield(Sentence<X> y) {
if (isLeaf()) {
Label lab = label();
// cdm: this is new hacked in stuff in Mar 2007 so we can now have a
// well-typed version of a Sentence, whose objects MUST implement HasWord
if (lab instanceof HasWord) {
y.add((X) lab);
} else {
y.add((X) new Word(lab));
}
} else {
Tree[] kids = children();
for (int i = 0; i < kids.length; i++) {
kids[i].yield(y);
}
}
return y;
}
示例8: Annotation
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public Annotation(List<CoreMap> sentences) {
super();
this.set(CoreAnnotations.SentencesAnnotation.class, sentences);
List<CoreLabel> tokens = new ArrayList<CoreLabel>();
StringBuilder text = new StringBuilder();
for (CoreMap sentence : sentences) {
List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
tokens.addAll(sentenceTokens);
if (sentence.containsKey(CoreAnnotations.TokensAnnotation.class)) {
text.append(sentence.get(CoreAnnotations.TokensAnnotation.class));
} else {
// If there is no text in the sentence, fake it as best as we can
if (text.length() > 0) {
text.append("\n");
}
text.append(Sentence.listToString(sentenceTokens));
}
}
this.set(CoreAnnotations.TokensAnnotation.class, tokens);
this.set(CoreAnnotations.TextAnnotation.class, text.toString());
}
示例9: printSentences
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public void printSentences(Iterable<List<? extends HasWord>> sentences, String filename)
{
try {
PrintWriter pw = IOUtils.getPrintWriter(filename);
for (List<? extends HasWord> sentence:sentences) {
pw.print("<s> "); // Note: Use <s sentence-id > to identify sentences
String sentString = Sentence.listToString(sentence);
if (sentence.size() > maxSentenceLength) {
logger.warning("Sentence length=" + sentence.size() +
" is longer than maximum set length " + maxSentenceLength);
logger.warning("Long Sentence: " + sentString);
}
pw.print(sentString);
pw.println(" </s>");
}
pw.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
示例10: restoreOriginalWords
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
@Override
public void restoreOriginalWords(Tree tree) {
if (originalSentence == null || tree == null) {
return;
}
List<Tree> leaves = tree.getLeaves();
if (leaves.size() != originalSentence.size()) {
throw new IllegalStateException("originalWords and sentence of different sizes: " + originalSentence.size() + " vs. " + leaves.size() +
"\n Orig: " + Sentence.listToString(originalSentence) +
"\n Pars: " + Sentence.listToString(leaves));
}
Iterator<? extends Label> wordsIterator = (Iterator<? extends Label>) originalSentence.iterator();
for (Tree leaf : leaves) {
leaf.setLabel(wordsIterator.next());
}
}
示例11: doOneSentence
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
private Tree doOneSentence(List<ParserConstraint> constraints,
List<CoreLabel> words) {
ParserQuery pq = parser.parserQuery();
pq.setConstraints(constraints);
pq.parse(words);
Tree tree = null;
try {
tree = pq.getBestParse();
// -10000 denotes unknown words
tree.setScore(pq.getPCFGScore() % -10000.0);
} catch (OutOfMemoryError e) {
System.err.println("WARNING: Parsing of sentence ran out of memory. " +
"Will ignore and continue: " +
Sentence.listToString(words));
}
return tree;
}
示例12: parse_tokens
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
* @param tokens One sentence worth of tokens at a time.
* @return A ParseTree object of the String representation of the tree, plus its probability.
* @throws TApplicationException
*/
public ParseTree parse_tokens(List<String> tokens, List<String> outputFormat) throws TApplicationException
{
try
{
treePrinter = ParserUtil.setOptions(outputFormat, tlp);
// a single sentence worth of tokens
String[] tokenArray = new String[tokens.size()];
tokens.toArray(tokenArray);
List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(tokenArray);
Tree parseTree = parser.apply(crazyStanfordFormat);
return new ParseTree(ParserUtil.TreeObjectToString(parseTree, treePrinter), parseTree.score());
}
catch (Exception e)
{
// FIXME
throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
}
}
示例13: main
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("usage: java TaggerDemo modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0]);
TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
"untokenizable=noneKeep");
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : documentPreprocessor) {
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
pw.println(Sentence.listToString(tSentence, false));
}
pw.close();
}
示例14: main
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
/**
* @param args
*/
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
if (args.length != 2) {
System.err.println("usage: java TaggerDemo modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0] );
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader(args[1])));
for (List<HasWord> sentence : sentences) {
ArrayList<TaggedWord> tSentence = tagger.tagSentence(sentence);
System.out.println(Sentence.listToString(tSentence, false));
}
}
示例15: countMWEStatistics
import edu.stanford.nlp.ling.Sentence; //导入依赖的package包/类
static public void countMWEStatistics(Tree t,
TwoDimensionalCounter<String, String> unigramTagger,
TwoDimensionalCounter<String, String> labelPreterm,
TwoDimensionalCounter<String, String> pretermLabel,
TwoDimensionalCounter<String, String> labelTerm,
TwoDimensionalCounter<String, String> termLabel)
{
updateTagger(unigramTagger,t);
//Count MWE statistics
TregexMatcher m = pMWE.matcher(t);
while (m.findNextMatchingNode()) {
Tree match = m.getMatch();
String label = match.value();
if(RESOLVE_DUMMY_TAGS && label.equals(FrenchTreeReader.MISSING_PHRASAL))
continue;
String preterm = Sentence.listToString(match.preTerminalYield());
String term = Sentence.listToString(match.yield());
labelPreterm.incrementCount(label,preterm);
pretermLabel.incrementCount(preterm,label);
labelTerm.incrementCount(label,term);
termLabel.incrementCount(term, label);
}
}