本文整理汇总了Java中opennlp.tools.tokenize.WhitespaceTokenizer.INSTANCE属性的典型用法代码示例。如果您正苦于以下问题:Java WhitespaceTokenizer.INSTANCE属性的具体用法?Java WhitespaceTokenizer.INSTANCE怎么用?Java WhitespaceTokenizer.INSTANCE使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类opennlp.tools.tokenize.WhitespaceTokenizer
的用法示例。
在下文中一共展示了WhitespaceTokenizer.INSTANCE属性的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getNLPModel
public static DoccatModel getNLPModel(File openNLPTraining) throws IOException {
DoccatModel model = null;
FeatureGenerator[] def = { new BagOfWordsFeatureGenerator() };
WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
DoccatFactory factory = new DoccatFactory(tokenizer, def);
InputStreamFactory isf = new MarkableFileInputStreamFactory(openNLPTraining);
ObjectStream<String> lineStream = new PlainTextByLineStream(isf, "UTF-8");
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
TrainingParameters params = TrainingParameters.defaultParams();
System.out.println(params.algorithm());
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(4000));
model = DocumentCategorizerME.train("en", sampleStream, params, factory);
evaluateDoccatModel(model, openNLPTraining);
return model;
}
示例2: getIntentMatcher
@Override
public IntentMatcher getIntentMatcher()
{
// model was built with OpenNLP whitespace tokenizer
OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);
// use Open NLP NER for slot matching
OpenNLPSlotMatcher slotMatcher = new OpenNLPSlotMatcher(tokenizer);
slotMatcher.addSlotModel("Address", "models/en-ner-address.bin");
// create intent matcher
OpenNLPIntentMatcher matcher = new OpenNLPIntentMatcher("models/en-cat-taxi-intents.bin", tokenizer, slotMatcher);
Intent intent = new Intent("OrderTaxi");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
intent = new Intent("CancelTaxi");
matcher.addIntent(intent);
intent = new Intent("WhereTaxi");
matcher.addIntent(intent);
intent = new Intent("GaveAddress");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
intent = new Intent("Stop");
matcher.addIntent(intent);
intent = new Intent("Help");
matcher.addIntent(intent);
intent = new Intent("FavColor");
matcher.addIntent(intent);
return matcher;
}
示例3: getIntentMatcher
@Override
public IntentMatcher getIntentMatcher()
{
// model was built with OpenNLP whitespace tokenizer
OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);
// use Open NLP NER for slot matching
OpenNLPSlotMatcher slotMatcher = new OpenNLPSlotMatcher(tokenizer);
slotMatcher.addSlotModel("Address", "models/en-ner-address.bin");
// create intent matcher
OpenNLPIntentMatcher matcher = new OpenNLPIntentMatcher("models/en-cat-taxi-intents.bin", tokenizer, slotMatcher);
Intent intent = new Intent("OrderTaxi");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
intent = new Intent("CancelTaxi");
matcher.addIntent(intent);
intent = new Intent("WhereTaxi");
matcher.addIntent(intent);
intent = new Intent("GaveAddress");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
return matcher;
}
示例4: getTokenizer
public Tokenizer getTokenizer() {
if (this.tokenizer == null) {
if (artifactProvider != null) {
String className = artifactProvider.getManifestProperty(TOKENIZER_NAME);
if (className != null) {
this.tokenizer = ExtensionLoader.instantiateExtension(
Tokenizer.class , className);
}
}
if (this.tokenizer == null) { // could not load using artifact provider
this.tokenizer = WhitespaceTokenizer.INSTANCE;
}
}
return tokenizer;
}
示例5: testCategorization
@Test
public void testCategorization()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-cat-taxi-intents.bin");
assertThat(modelUrl, is(notNullValue()));
DoccatModel model = new DoccatModel(modelUrl);
assertThat(model, is(notNullValue()));
DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);
// model was built with OpenNLP whitespace tokenizer
OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);
String category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Order me a taxi")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send me a taxi")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer
.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send a taxi to 12 Pleasent Street")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Cancel my cab")));
assertThat(category, is(notNullValue()));
assertThat(category, is("CancelTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Where is my taxi ?")));
assertThat(category, is(notNullValue()));
assertThat(category, is("WhereTaxi"));
category = myCategorizer
.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("The address is 136 River Road")));
assertThat(category, is(notNullValue()));
assertThat(category, is("GaveAddress"));
}
示例6: getTokenizer
public Tokenizer getTokenizer() {
if(tokenizer != null) {
return ExtensionLoader.instantiateExtension(Tokenizer.class, this.tokenizer);
}
return WhitespaceTokenizer.INSTANCE;
}
示例7: createTokenizer
private static Tokenizer createTokenizer(String tokenizer) {
if(tokenizer != null) {
return ExtensionLoader.instantiateExtension(Tokenizer.class, tokenizer);
}
return WhitespaceTokenizer.INSTANCE;
}
示例8: getTokenizer
private Tokenizer getTokenizer(String tokenizer) {
if(tokenizer != null) {
return ExtensionLoader.instantiateExtension(Tokenizer.class, tokenizer);
}
return WhitespaceTokenizer.INSTANCE;
}
示例9: AuthorAgeSampleStream
public AuthorAgeSampleStream(ObjectStream<String> samples) {
super(samples);
this.tokenizer = WhitespaceTokenizer.INSTANCE;
}