本文整理汇总了Java中edu.stanford.nlp.ling.Sentence.listToString方法的典型用法代码示例。如果您正苦于以下问题:Java Sentence.listToString方法的具体用法?Java Sentence.listToString怎么用?Java Sentence.listToString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类edu.stanford.nlp.ling.Sentence
的用法示例。
在下文中一共展示了Sentence.listToString方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: printSentences
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public void printSentences(Iterable<List<? extends HasWord>> sentences, String filename)
{
try {
PrintWriter pw = IOUtils.getPrintWriter(filename);
for (List<? extends HasWord> sentence:sentences) {
pw.print("<s> "); // Note: Use <s sentence-id > to identify sentences
String sentString = Sentence.listToString(sentence);
if (sentence.size() > maxSentenceLength) {
logger.warning("Sentence length=" + sentence.size() +
" is longer than maximum set length " + maxSentenceLength);
logger.warning("Long Sentence: " + sentString);
}
pw.print(sentString);
pw.println(" </s>");
}
pw.close();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
示例2: restoreOriginalWords
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
@Override
public void restoreOriginalWords(Tree tree) {
if (originalSentence == null || tree == null) {
return;
}
List<Tree> leaves = tree.getLeaves();
if (leaves.size() != originalSentence.size()) {
throw new IllegalStateException("originalWords and sentence of different sizes: " + originalSentence.size() + " vs. " + leaves.size() +
"\n Orig: " + Sentence.listToString(originalSentence) +
"\n Pars: " + Sentence.listToString(leaves));
}
Iterator<? extends Label> wordsIterator = (Iterator<? extends Label>) originalSentence.iterator();
for (Tree leaf : leaves) {
leaf.setLabel(wordsIterator.next());
}
}
示例3: countMWEStatistics
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
static public void countMWEStatistics(Tree t,
TwoDimensionalCounter<String, String> unigramTagger,
TwoDimensionalCounter<String, String> labelPreterm,
TwoDimensionalCounter<String, String> pretermLabel,
TwoDimensionalCounter<String, String> labelTerm,
TwoDimensionalCounter<String, String> termLabel)
{
updateTagger(unigramTagger,t);
//Count MWE statistics
TregexMatcher m = pMWE.matcher(t);
while (m.findNextMatchingNode()) {
Tree match = m.getMatch();
String label = match.value();
if(RESOLVE_DUMMY_TAGS && label.equals(FrenchTreeReader.MISSING_PHRASAL))
continue;
String preterm = Sentence.listToString(match.preTerminalYield());
String term = Sentence.listToString(match.yield());
labelPreterm.incrementCount(label,preterm);
pretermLabel.incrementCount(preterm,label);
labelTerm.incrementCount(label,term);
termLabel.incrementCount(term, label);
}
}
示例4: sentenceSplitter
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public List<String> sentenceSplitter(String input){
Reader reader = new StringReader(input);
DocumentPreprocessor dp = new DocumentPreprocessor(reader);
List<String> sentenceList = new ArrayList<String>();
for (List<HasWord> sentence : dp) {
String sentenceString = Sentence.listToString(sentence);
sentenceList.add(sentenceString.toString());
}
return sentenceList;
}
示例5: countNgrams
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
static public void countNgrams(String line, Counter<String> ngramCounts, Set<String> limitSet, int order) {
String[] toks = line.split("\\s");
for (int i = 0; i < toks.length; i++) {
for (int j = 0; j < order && j+i < toks.length ; j++) {
String[] ngramArr = Arrays.copyOfRange(toks, i, i+j+1);
String ngram = Sentence.listToString(Arrays.asList(ngramArr));
if (limitSet == null || limitSet.contains(ngram)) {
ngramCounts.incrementCount(ngram);
}
}
}
}
示例6: flattenTree
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
/**
* Returns the string associated with the input parse tree. Traces and
* ATB-specific escape sequences (e.g., "-RRB-" for ")") are removed.
*
* @param t - A parse tree
* @return The yield of the input parse tree
*/
public static String flattenTree(Tree t) {
t = t.prune(emptyFilter, tf);
String flatString = Sentence.listToString(t.yield());
return flatString;
}
示例7: taggedStringFromTree
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
/**
* Converts a parse tree into a string of tokens. Each token is a word and
* its POS tag separated by the delimiter specified by <code>separator</code>
*
* @param t - A parse tree
* @param removeEscaping - If true, remove LDC escape characters. Otherwise, leave them.
* @param separator Word/tag separator
* @return A string of tagged words
*/
public static String taggedStringFromTree(Tree t, boolean removeEscaping, String separator) {
t = t.prune(emptyFilter, tf);
List<CoreLabel> taggedSentence = t.taggedLabeledYield();
for (CoreLabel token : taggedSentence) {
String word = (removeEscaping) ? unEscape(token.word()) : token.word();
token.setWord(word);
token.setValue(word);
}
return Sentence.listToString(taggedSentence, false, separator);
}
示例8: getAnnotationFromParseTrees
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public static Annotation getAnnotationFromParseTrees(List<String> parseTrees)
{
List<CoreMap> sentences = new ArrayList<CoreMap>();
List<String> allTokens = new ArrayList<String>();
int tokenOffset = 0;
for (String tree : parseTrees)
{
List<String> tokens = new ArrayList<String>();
String[] firstSplit = tree.split("\\) ");
for (String f : firstSplit)
{
String[] secondSplit = f.split("\\(");
String[] tagAndToken = secondSplit[secondSplit.length-1].trim().replaceAll("\\)+$", "").split(" ");
tokens.add(tagAndToken[1]);
}
allTokens.addAll(tokens);
String[] tokensArr = new String[tokens.size()];
tokens.toArray(tokensArr);
List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
String originalText = Sentence.listToString(tokens);
CoreMap sentence = new Annotation(originalText);
sentence.set(CharacterOffsetBeginAnnotation.class, 0);
sentence.set(CharacterOffsetEndAnnotation.class, sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
tokenOffset += sentenceTokens.size();
sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset);
ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(), sentence, Tree.valueOf(tree));
sentences.add(sentence);
}
Annotation allSentences = new Annotation(Sentence.listToString(allTokens));
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
return allSentences;
}
示例9: getAnnotationFromTokens
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
public static Annotation getAnnotationFromTokens(List<String> tokens, Annotation existingAnnotation)
{
List<CoreMap> sentences = new ArrayList<CoreMap>();
Annotation allSentences;
String[] tokensArr = new String[tokens.size()];
tokens.toArray(tokensArr);
List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
String originalText = Sentence.listToString(tokens);
CoreMap sentence = new Annotation(originalText);
sentence.set(CharacterOffsetBeginAnnotation.class, 0);
sentence.set(CharacterOffsetEndAnnotation.class,
sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
sentence.set(CoreAnnotations.TokenEndAnnotation.class, sentenceTokens.size());
sentences.add(sentence);
if (existingAnnotation != null)
{
sentences.addAll(existingAnnotation.get(CoreAnnotations.SentencesAnnotation.class));
allSentences = existingAnnotation.copy();
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
}
else
{
allSentences = new Annotation(Sentence.listToString(tokens));
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
}
return allSentences;
}
示例10: decode
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
/**
* Decode raw text input.
*
* @param postProcessor
* @param reader
* @param outstream
* @param nThreads
* @return
*/
protected static double decode(final CRFPostprocessor postProcessor,
BufferedReader reader, PrintWriter outstream, int nThreads) {
long numChars = 0;
int lineNumber = 0;
long startTime = System.nanoTime();
try {
// Setup the threadpool
MulticoreWrapper<String,String> wrapper =
new MulticoreWrapper<String,String>(nThreads,
new ThreadsafeProcessor<String,String>() {
@Override
public String process(String input) {
List<CoreLabel> labeledSeq = ProcessorTools.toCharacterSequence(input);
labeledSeq = postProcessor.classifier.classify(labeledSeq);
List<CoreLabel> tokenSeq = ProcessorTools.toPostProcessedSequence(labeledSeq);
return Sentence.listToString(tokenSeq);
}
@Override
public ThreadsafeProcessor<String, String> newInstance() {
return this;
}
});
// Read the input
for (String line; (line = reader.readLine()) != null; ++lineNumber) {
numChars += line.length();
wrapper.put(line.trim());
while(wrapper.peek()) outstream.println(wrapper.poll());
}
wrapper.join();
while(wrapper.peek()) outstream.println(wrapper.poll());
} catch (IOException e) {
System.err.printf("%s: Error at input line %d%s", CRFPostprocessor.class.getName(), lineNumber);
e.printStackTrace();
}
// Calculate throughput
double elapsedTime = ((double) System.nanoTime() - startTime) / 1e9;
double charsPerSecond = (double) numChars / elapsedTime;
return charsPerSecond;
}
示例11: get
import edu.stanford.nlp.ling.Sentence; //导入方法依赖的package包/类
/**
* Map the input word to a word class.
*
* @param word
* @return
*/
public IString get(IString word) {
List<IString> classList = getList(word);
return numMappings == 1 ? classList.get(0) : new IString(Sentence.listToString(classList, true, DELIMITER));
}