本文整理汇总了Java中edu.stanford.nlp.ling.Document类的典型用法代码示例。如果您正苦于以下问题:Java Document类的具体用法?Java Document怎么用?Java Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Document类属于edu.stanford.nlp.ling包,在下文中一共展示了Document类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import edu.stanford.nlp.ling.Document; //导入依赖的package包/类
/**
* This will do the escaping on an input file. Input file must already be tokenized,
* with tokens separated by whitespace. <br>
* Usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl
*
* @param args Command line argument: a file or URL
*/
public static void main(String[] args) {
if (args.length != 1) {
System.out.println("usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl");
System.exit(0);
}
String filename = args[0];
try {
Document<String, Word, Word> d; // initialized below
if (filename.startsWith("http://")) {
Document<String, Word, Word> dpre = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new URL(filename));
DocumentProcessor<Word, Word, String, Word> notags = new StripTagsProcessor<String, Word>();
d = notags.processDocument(dpre);
} else {
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename));
}
DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<Word, String, Word>();
Document<String, Word, HasWord> newD = proc.processDocument(d);
for (HasWord word : newD) {
System.out.println(word);
}
} catch (Exception e) {
e.printStackTrace();
}
}
示例2: main
import edu.stanford.nlp.ling.Document; //导入依赖的package包/类
/**
* This will do the escaping on an input file. Input file should already be tokenized,
* with tokens separated by whitespace. <br>
* Usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl
*
* @param args Command line argument: a file or URL
*/
public static void main(String[] args) {
if (args.length != 1) {
System.out.println("usage: java edu.stanford.nlp.process.PTBEscapingProcessor fileOrUrl");
return;
}
String filename = args[0];
try {
Document<String, Word, Word> d; // initialized below
if (filename.startsWith("http://")) {
Document<String, Word, Word> dpre = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new URL(filename));
DocumentProcessor<Word, Word, String, Word> notags = new StripTagsProcessor<String, Word>();
d = notags.processDocument(dpre);
} else {
d = new BasicDocument<String>(WhitespaceTokenizer.factory()).init(new File(filename));
}
DocumentProcessor<Word, HasWord, String, Word> proc = new PTBEscapingProcessor<Word, String, Word>();
Document<String, Word, HasWord> newD = proc.processDocument(d);
for (HasWord word : newD) {
System.out.println(word);
}
} catch (Exception e) {
e.printStackTrace();
}
}
示例3: main
import edu.stanford.nlp.ling.Document; //导入依赖的package包/类
/**
* For internal debugging purposes only.
*/
public static void main(String[] args) {
new BasicDocument<String>();
Document<String, Word, Word> htmlDoc = BasicDocument.init("top text <h1>HEADING text</h1> this is <p>new paragraph<br>next line<br/>xhtml break etc.");
System.out.println("Before:");
System.out.println(htmlDoc);
Document<String, Word, Word> txtDoc = new StripTagsProcessor<String, Word>(true).processDocument(htmlDoc);
System.out.println("After:");
System.out.println(txtDoc);
Document<String, Word, List<Word>> sentences = new WordToSentenceProcessor<Word>().processDocument(txtDoc);
System.out.println("Sentences:");
System.out.println(sentences);
}
示例4: main
import edu.stanford.nlp.ling.Document; //导入依赖的package包/类
/**
* This will print out some text, recognizing tags. It can be used to
* test tag breaking. <br> Usage: <code>
* java edu.stanford.nlp.process.WordToTaggedWordProcessor fileOrUrl
* </code>
*
* @param args Command line argument: a file or URL
*/
public static void main(String[] args) {
if (args.length != 1) {
System.out.println("usage: java edu.stanford.nlp.process.WordToTaggedWordProcessor fileOrUrl");
System.exit(0);
}
String filename = args[0];
try {
Document<HasWord, Word, Word> d;
if (filename.startsWith("http://")) {
Document<HasWord, Word, Word> dpre = new BasicDocument<HasWord>().init(new URL(filename));
DocumentProcessor<Word, Word, HasWord, Word> notags = new StripTagsProcessor<HasWord, Word>();
d = notags.processDocument(dpre);
} else {
d = new BasicDocument<HasWord>().init(new File(filename));
}
DocumentProcessor<Word, HasWord, HasWord, Word> proc = new WordToTaggedWordProcessor<Word, HasWord, Word>();
Document<HasWord, Word, HasWord> sentd = proc.processDocument(d);
// System.out.println(sentd);
int i = 0;
for (HasWord w : sentd) {
System.out.println(i + ": " + w);
i++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
示例5: processDocument
import edu.stanford.nlp.ling.Document; //导入依赖的package包/类
public Document<L, F, OUT> processDocument(Document<L, F, IN> in) {
Document<L, F, OUT> doc = in.blankDocument();
doc.addAll(process(in));
return doc;
}
示例6: processDocument
import edu.stanford.nlp.ling.Document; //导入依赖的package包/类
public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> in) {
Document<L, F, List<IN>> doc = in.blankDocument();
doc.addAll(process(in));
return doc;
}
示例7: processDocument
import edu.stanford.nlp.ling.Document; //导入依赖的package包/类
/**
* Converts a Document to a different Document, by transforming
* or filtering the original Document. The general contract of this method
* is to not modify the <code>in</code> Document in any way, and to
* preserve the metadata of the <code>in</code> Document in the
* returned Document.
*
* @see FunctionProcessor
*/
public Document<L, F, OUT> processDocument(Document<L, F, IN> in);