本文整理汇总了Java中edu.stanford.nlp.process.CoreLabelTokenFactory类的典型用法代码示例。如果您正苦于以下问题:Java CoreLabelTokenFactory类的具体用法?Java CoreLabelTokenFactory怎么用?Java CoreLabelTokenFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CoreLabelTokenFactory类属于edu.stanford.nlp.process包,在下文中一共展示了CoreLabelTokenFactory类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initialize
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
* Initializes the tokenizer to detect date columns.
*/
public void initialize() {
Properties props = new Properties();
pipeline.addAnnotator(new TokenizerAnnotator(false) {
@Override
public Tokenizer<CoreLabel> getTokenizer(Reader r) {
// TODO Auto-generated method stub
return new PTBTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), "");
}
});
pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
pipeline.addAnnotator(new POSTaggerAnnotator(false));
pipeline.addAnnotator(new TimeAnnotator("sutime", props));
}
示例2: AbstractSequenceClassifier
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
* Initialize the featureFactory and other variables based on the passed in
* flags.
*
* @param flags A specification of the AbstractSequenceClassifier to construct.
*/
public AbstractSequenceClassifier(SeqClassifierFlags flags) {
this.flags = flags;
// try {
this.featureFactory = new MetaClass(flags.featureFactory).createInstance(flags.featureFactoryArgs);
// this.featureFactory = (FeatureFactory<IN>) Class.forName(flags.featureFactory).newInstance();
if (flags.tokenFactory == null) {
tokenFactory = (CoreTokenFactory<IN>) new CoreLabelTokenFactory();
} else {
this.tokenFactory = new MetaClass(flags.tokenFactory).createInstance(flags.tokenFactoryArgs);
// this.tokenFactory = (CoreTokenFactory<IN>) Class.forName(flags.tokenFactory).newInstance();
}
// } catch (Exception e) {
// throw new RuntimeException(e);
// }
pad = tokenFactory.makeToken();
windowSize = flags.maxLeft + 1;
reinit();
}
示例3: writeImage
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void writeImage(String sentence, String outFile, int scale) throws Exception {
LexicalizedParser lp = null;
try {
lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
} catch (Exception e) {
System.err.println("Could not load file englishPCFG.ser.gz. Try placing this file in the same directory as Dependencee.jar");
return;
}
lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
TokenizerFactory<CoreLabel> tokenizerFactory =
PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
Tree tree = lp.apply(wordList);
writeImage(tree, outFile, scale);
}
示例4: testWriteImage
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
* Test of writeImage method, of class Main.
*/
@Test
public void testWriteImage() throws Exception {
String text = "A quick brown fox jumped over the lazy dog.";
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
LexicalizedParser lp = LexicalizedParser.loadModel();
lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
TokenizerFactory<CoreLabel> tokenizerFactory =
PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(text)).tokenize();
Tree tree = lp.apply(wordList);
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();
Main.writeImage(tdl, "image.png", 3);
assert (new File("image.png").exists());
}
示例5: main
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("usage: java TaggerDemo modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0]);
TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
"untokenizable=noneKeep");
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : documentPreprocessor) {
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
pw.println(Sentence.listToString(tSentence, false));
}
pw.close();
}
示例6: getTokenizer
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
private static Tokenizer<CoreLabel> getTokenizer(String text, String language) {
if (language.equals("Characterwise")) {
return new CharacterwiseTokenizer(text, new CoreLabelTokenFactory());
} else {
StringReader reader = new StringReader(text);
TokenizerAnnotator tok = new TokenizerAnnotator(VERBOSE, language, OPTIONS);
return tok.getTokenizer(reader);
}
}
示例7: produceBagOfWords_Token
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
* Loads document from file and transform it in a token multi-set using stanford PTBTokenizer.
* @param documentPath
* @return
* @throws IOException
*/
public HashMultiset<String> produceBagOfWords_Token(String documentPath) throws IOException{
HashMultiset<String>tokenMultiset = HashMultiset.create();
PTBTokenizer<CoreLabel> ptbt = new PTBTokenizer<>(new FileReader(documentPath),
new CoreLabelTokenFactory(), "");
while (ptbt.hasNext()) {
CoreLabel label = ptbt.next();
tokenMultiset.add(label.toString());
// System.out.println(label);
}
// System.out.println("\n\nMULTISET:\n\n");
// for (String token: tokenMultiset) System.out.println(token +" "+ tokenMultiset.count(token));
return tokenMultiset;
}
示例8: tokenize
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public String[] tokenize(String s) {
s = s.replaceAll("\u00ad ", "\u00ad");
PTBTokenizer<CoreLabel> tokenizer = new PTBTokenizer<CoreLabel>(
new StringReader(s),
new CoreLabelTokenFactory(),
"invertible=false,ptb3Escaping=false");
List<CoreLabel> words = tokenizer.tokenize();
String[] result = new String[words.size()];
for (int i = 0; i < words.size(); i++)
result[i] = words.get(i).toString();
return result;
}
示例9: main
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
return;
}
MaxentTagger tagger = new MaxentTagger(args[0]);
TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
"untokenizable=noneKeep");
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
for (List<HasWord> sentence : documentPreprocessor) {
List<TaggedWord> tSentence = tagger.tagSentence(sentence);
pw.println(Sentence.listToString(tSentence, false));
}
// print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
List<TaggedWord> taggedSent = tagger.tagSentence(sent);
for (TaggedWord tw : taggedSent) {
if (tw.tag().startsWith("JJ")) {
pw.println(tw.word());
}
}
pw.close();
}
示例10: demoAPI
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
/**
* demoAPI demonstrates other ways of calling the parser with already
* tokenized text, or in some cases, raw text that needs to be tokenized as
* a single sentence. Output is handled with a TreePrint object. Note that
* the options used when creating the TreePrint can determine what results
* to print out. Once again, one can capture the output by passing a
* PrintWriter to TreePrint.printTree.
*
* difference: already tokenized text
*
*
*/
public static void demoAPI(LexicalizedParser lp) {
// This option shows parsing a list of correctly tokenized words
String[] sent = { "This", "is", "an", "easy", "sentence", "." };
List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
Tree parse = lp.apply(rawWords);
parse.pennPrint();
System.out.println();
// This option shows loading and using an explicit tokenizer
String sent2 = "Hey @Apple, pretty much all your products are amazing. You blow minds every time you launch a new gizmo."
+ " that said, your hold music is crap";
TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(
new CoreLabelTokenFactory(), "");
Tokenizer<CoreLabel> tok = tokenizerFactory
.getTokenizer(new StringReader(sent2));
List<CoreLabel> rawWords2 = tok.tokenize();
parse = lp.apply(rawWords2);
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
// You can also use a TreePrint object to print trees and dependencies
TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
tp.printTree(parse);
}
示例11: tokenizeStanford
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static String tokenizeStanford(String line)
{
StringBuilder str = new StringBuilder();
Tokenizer<CoreLabel> tokenizer = new PTBTokenizer(new StringReader(line), new CoreLabelTokenFactory(), "asciiQuotes=true untokenizable=allDelete");
while(tokenizer.hasNext())
{
CoreLabel label = tokenizer.next();
if(!label.toString().matches("``|\'\'|\"|-[LR][RCR]B-"))
str.append(label).append(" ");
}
return str.toString().trim();
}
示例12: MUCMentionExtractor
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public MUCMentionExtractor(Dictionaries dict, Properties props, Semantics semantics) throws Exception {
super(dict, semantics);
String fileName = props.getProperty(Constants.MUC_PROP);
fileContents = IOUtils.slurpFile(fileName);
currentOffset = 0;
tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(false), "");
stanfordProcessor = loadStanfordProcessor(props);
}
示例13: WhitespaceTokenizerAnnotator
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public WhitespaceTokenizerAnnotator(Properties props) {
super(false);
boolean eolIsSignificant =
Boolean.valueOf(props.getProperty(EOL_PROPERTY, "false"));
eolIsSignificant =
(eolIsSignificant ||
Boolean.valueOf(props.getProperty
(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")));
factory = new WhitespaceTokenizer.WhitespaceTokenizerFactory<CoreLabel>
(new CoreLabelTokenFactory(), eolIsSignificant);
}
示例14: getGraph
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static Graph getGraph(String sentence) throws Exception {
LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
lp.setOptionFlags(new String[]{"-maxLength", "500", "-retainTmpSubcategories"});
TokenizerFactory<CoreLabel> tokenizerFactory =
PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> wordList = tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
Tree tree = lp.apply(wordList);
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
Collection<TypedDependency> tdl = gs.typedDependencies();
return getGraph(tree, tdl);
}
示例15: demoAPI
import edu.stanford.nlp.process.CoreLabelTokenFactory; //导入依赖的package包/类
public static void demoAPI(LexicalizedParser lp) {
// This option shows parsing a list of correctly tokenized words
String[] sent = { "This", "is", "an", "easy", "sentence", "." };
List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
Tree parse = lp.apply(rawWords);
parse.pennPrint();
System.out.println();
// This option shows loading and using an explicit tokenizer
String sent2 = "This is another sentence.";
TokenizerFactory<CoreLabel> tokenizerFactory =
PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
List<CoreLabel> rawWords2 =
tokenizerFactory.getTokenizer(new StringReader(sent2)).tokenize();
parse = lp.apply(rawWords2);
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
tp.printTree(parse);
}