本文整理汇总了Java中opennlp.tools.tokenize.WhitespaceTokenizer类的典型用法代码示例。如果您正苦于以下问题:Java WhitespaceTokenizer类的具体用法?Java WhitespaceTokenizer怎么用?Java WhitespaceTokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
WhitespaceTokenizer类属于opennlp.tools.tokenize包,在下文中一共展示了WhitespaceTokenizer类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: interpret
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public Intent interpret(String query) {
String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(query);
double[] outcome = categorizer.categorize(tokens);
logger.debug(categorizer.getAllResults(outcome));
Intent intent = new Intent(categorizer.getBestCategory(outcome));
for (NameFinderME nameFinderME : nameFinderMEs) {
Span[] spans = nameFinderME.find(tokens);
String[] names = Span.spansToStrings(spans, tokens);
for (int i = 0; i < spans.length; i++) {
intent.getEntities().put(spans[i].getType(), names[i]);
}
}
logger.debug(intent.toString());
return intent;
}
示例2: getNLPModel
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public static DoccatModel getNLPModel(File openNLPTraining) throws IOException {
DoccatModel model = null;
FeatureGenerator[] def = { new BagOfWordsFeatureGenerator() };
WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
DoccatFactory factory = new DoccatFactory(tokenizer, def);
InputStreamFactory isf = new MarkableFileInputStreamFactory(openNLPTraining);
ObjectStream<String> lineStream = new PlainTextByLineStream(isf, "UTF-8");
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
TrainingParameters params = TrainingParameters.defaultParams();
System.out.println(params.algorithm());
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(4000));
model = DocumentCategorizerME.train("en", sampleStream, params, factory);
evaluateDoccatModel(model, openNLPTraining);
return model;
}
示例3: getIntentMatcher
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
@Override
public IntentMatcher getIntentMatcher()
{
// model was built with OpenNLP whitespace tokenizer
OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);
// use Open NLP NER for slot matching
OpenNLPSlotMatcher slotMatcher = new OpenNLPSlotMatcher(tokenizer);
slotMatcher.addSlotModel("Address", "models/en-ner-address.bin");
// create intent matcher
OpenNLPIntentMatcher matcher = new OpenNLPIntentMatcher("models/en-cat-taxi-intents.bin", tokenizer, slotMatcher);
Intent intent = new Intent("OrderTaxi");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
intent = new Intent("CancelTaxi");
matcher.addIntent(intent);
intent = new Intent("WhereTaxi");
matcher.addIntent(intent);
intent = new Intent("GaveAddress");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
intent = new Intent("Stop");
matcher.addIntent(intent);
intent = new Intent("Help");
matcher.addIntent(intent);
intent = new Intent("FavColor");
matcher.addIntent(intent);
return matcher;
}
示例4: getIntentMatcher
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
@Override
public IntentMatcher getIntentMatcher()
{
// model was built with OpenNLP whitespace tokenizer
OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);
// use Open NLP NER for slot matching
OpenNLPSlotMatcher slotMatcher = new OpenNLPSlotMatcher(tokenizer);
slotMatcher.addSlotModel("Address", "models/en-ner-address.bin");
// create intent matcher
OpenNLPIntentMatcher matcher = new OpenNLPIntentMatcher("models/en-cat-taxi-intents.bin", tokenizer, slotMatcher);
Intent intent = new Intent("OrderTaxi");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
intent = new Intent("CancelTaxi");
matcher.addIntent(intent);
intent = new Intent("WhereTaxi");
matcher.addIntent(intent);
intent = new Intent("GaveAddress");
intent.addSlot(new LiteralSlot("Address"));
matcher.addIntent(intent);
return matcher;
}
示例5: getTokenizer
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public Tokenizer getTokenizer() {
if (this.tokenizer == null) {
if (artifactProvider != null) {
String className = artifactProvider.getManifestProperty(TOKENIZER_NAME);
if (className != null) {
this.tokenizer = ExtensionLoader.instantiateExtension(
Tokenizer.class , className);
}
}
if (this.tokenizer == null) { // could not load using artifact provider
this.tokenizer = WhitespaceTokenizer.INSTANCE;
}
}
return tokenizer;
}
示例6: read
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public ProfilerSample read() throws IOException {
String sampleString = samples.read();
if (sampleString != null) {
// Whitespace tokenize entire string
String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(sampleString);
ProfilerSample sample;
if (tokens.length > 1) {
String gender = tokens[0];
String age = tokens[1];
String docTokens[] = new String[tokens.length - 2];
System.arraycopy(tokens, 2, docTokens, 0, tokens.length - 1);
sample = new ProfilerSample(gender, age, docTokens);
} else {
throw new IOException(
"Empty lines, or lines with only age and gender strings are not allowed!");
}
return sample;
} else {
return null;
}
}
示例7: testCategorization
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
@Test
public void testCategorization()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-cat-taxi-intents.bin");
assertThat(modelUrl, is(notNullValue()));
DoccatModel model = new DoccatModel(modelUrl);
assertThat(model, is(notNullValue()));
DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);
// model was built with OpenNLP whitespace tokenizer
OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);
String category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Order me a taxi")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send me a taxi")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer
.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send a taxi to 12 Pleasent Street")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Cancel my cab")));
assertThat(category, is(notNullValue()));
assertThat(category, is("CancelTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Where is my taxi ?")));
assertThat(category, is(notNullValue()));
assertThat(category, is("WhereTaxi"));
category = myCategorizer
.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("The address is 136 River Road")));
assertThat(category, is(notNullValue()));
assertThat(category, is("GaveAddress"));
}
示例8: getTokenizer
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public Tokenizer getTokenizer() {
if(tokenizer != null) {
return ExtensionLoader.instantiateExtension(Tokenizer.class, this.tokenizer);
}
return WhitespaceTokenizer.INSTANCE;
}
示例9: createTokenizer
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
private static Tokenizer createTokenizer(String tokenizer) {
if(tokenizer != null) {
return ExtensionLoader.instantiateExtension(Tokenizer.class, tokenizer);
}
return WhitespaceTokenizer.INSTANCE;
}
示例10: getTokenizer
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
private Tokenizer getTokenizer(String tokenizer) {
if(tokenizer != null) {
return ExtensionLoader.instantiateExtension(Tokenizer.class, tokenizer);
}
return WhitespaceTokenizer.INSTANCE;
}
示例11: AuthorAgeSampleStream
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public AuthorAgeSampleStream(ObjectStream<String> samples) {
super(samples);
this.tokenizer = WhitespaceTokenizer.INSTANCE;
}
示例12: ProfilerSample
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public ProfilerSample(String gender, String age, String text) {
this(gender, age, WhitespaceTokenizer.INSTANCE.tokenize(text));
}
示例13: read
import opennlp.tools.tokenize.WhitespaceTokenizer; //导入依赖的package包/类
public BratAnnotation read() throws IOException {
String line = reader.readLine();
if (line != null) {
String values[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
if (values.length > 2) {
String typeClass = null;
String id = values[BratAnnotationParser.ID_OFFSET];
switch (id.charAt(0)) {
case 'T':
typeClass = SPAN_TYPE;
break;
case 'A':
typeClass = ATTRIBUTE_TYPE;
break;
case 'R':
typeClass = RELATION_TYPE;
break;
default:
throw new IOException("unkown annotation type");
}
BratAnnotationParser parser = parsers.get(typeClass);
if (parser == null) {
throw new IOException(
"Failed to parse ann document with id " + id
+ " type class, no parser registered: "
+ values[BratAnnotationParser.TYPE_OFFSET]);
}
return parser.parse(values);
}
} else {
return null;
}
return null;
}