本文整理汇总了Java中opennlp.tools.doccat.DoccatModel类的典型用法代码示例。如果您正苦于以下问题:Java DoccatModel类的具体用法?Java DoccatModel怎么用?Java DoccatModel使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DoccatModel类属于opennlp.tools.doccat包,在下文中一共展示了DoccatModel类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getNLPModel
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public static DoccatModel getNLPModel(File openNLPTraining) throws IOException {
DoccatModel model = null;
FeatureGenerator[] def = { new BagOfWordsFeatureGenerator() };
WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
DoccatFactory factory = new DoccatFactory(tokenizer, def);
InputStreamFactory isf = new MarkableFileInputStreamFactory(openNLPTraining);
ObjectStream<String> lineStream = new PlainTextByLineStream(isf, "UTF-8");
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
TrainingParameters params = TrainingParameters.defaultParams();
System.out.println(params.algorithm());
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(4000));
model = DocumentCategorizerME.train("en", sampleStream, params, factory);
evaluateDoccatModel(model, openNLPTraining);
return model;
}
示例2: SentimentAnalyser
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public SentimentAnalyser() {
try (InputStream modelStream = SentimentAnalyser.class.getResourceAsStream(MODEL)) {
DoccatModel tokenizerModel = new DoccatModel(modelStream);
categorizer = new DocumentCategorizerME(tokenizerModel);
} catch (IOException e) {
LOGGER.error("an error occurred while getting categories", e);
throw new IllegalStateException(e);
}
}
示例3: main2
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public static void main2(String[] args) throws IOException {
// Weka arff model
Instances dataSet = getWekaDataSet();
BufferedWriter writer = new BufferedWriter(new FileWriter(MODEL_FOLDER + "/comments.arff"));
writer.write(dataSet.toString());
writer.flush();
writer.close();
// Open nlp model
File traningFileNlp = new File(MODEL_FOLDER + "/openNPLTraining.txt");
setupNLPTraning(traningFileNlp);
DoccatModel model = getNLPModel(traningFileNlp);
OutputStream modelOut = null;
try {
modelOut = new BufferedOutputStream(new FileOutputStream(MODEL_FOLDER + "/open_comments.model"));
model.serialize(modelOut);
} catch (IOException e) {
// Failed to save model
e.printStackTrace();
} finally {
if (modelOut != null) {
modelOut.close();
}
}
}
示例4: evaluateDoccatModel
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public static void evaluateDoccatModel(DoccatModel model,File openNLPTraining) throws IOException{
InputStreamFactory isf = new MarkableFileInputStreamFactory(openNLPTraining);
ObjectStream<String> lineStream = new PlainTextByLineStream(isf, "UTF-8");
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
List<EvaluationMonitor<DocumentSample>> listeners = new LinkedList<EvaluationMonitor<DocumentSample>>();
listeners.add(new DoccatEvaluationErrorListener());
listeners.add(new DoccatFineGrainedReportListener());
DocumentCategorizerEvaluator eval = new DocumentCategorizerEvaluator(new DocumentCategorizerME(model),listeners.toArray(new DoccatEvaluationMonitor[listeners.size()]));
eval.evaluate(sampleStream);
System.out.println(eval);
}
示例5: classify
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public String classify(String summary) {
InputStream inputStrean = null;
try {
inputStrean = new FileInputStream(Paths.get(bin).toFile());
DoccatModel doccatModel = new DoccatModel(inputStrean);
DocumentCategorizerME myCategorizer = new DocumentCategorizerME(doccatModel);
double[] outcomes = myCategorizer.categorize(summary);
String category = myCategorizer.getBestCategory(outcomes);
LOGGER.info(category);
return Type.valueOf(category).getType();
} catch (IOException e) {
LOGGER.error(ExceptionUtils.getStackTrace(e));
return Type.SEM_CLASSIFICACAO.getType();
} finally {
if (Objects.nonNull(inputStrean)) {
close(inputStrean);
}
}
}
示例6: run
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
@PostConstruct
@SuppressWarnings("deprecation")
public void run() {
DoccatModel model = null;
OutputStream modelOut = null;
try {
// Ensinando a máquina
InputStreamFactory dataIn = new MarkableFileInputStreamFactory(Paths.get(train).toFile());
ObjectStream<String> lineStream = new PlainTextByLineStream(dataIn, "UTF-8");
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
model = DocumentCategorizerME.train("pt", sampleStream);
// Escrevendo arquivo que ela aprendeu
modelOut = new BufferedOutputStream(new FileOutputStream(Paths.get(bin).toFile()));
model.serialize(modelOut);
} catch (IOException e) {
LOGGER.error(ExceptionUtils.getStackTrace(e));
} finally {
if (Objects.nonNull(modelOut)) {
closeOutputStream(modelOut);
}
}
}
示例7: OpenNLPIntentMatcher
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
/**
* Constructor. Sets up the matcher to use the specified model (via a URL) and specifies the minimum and maybe match
* score.
*
* @param intentModelUrl A URL pointing at the document categoriser model file to load.
* @param minMatchScore The minimum match score for an intent match to be considered good.
* @param maybeMatchScore The maybe match score. Use -1 to disable maybe matching.
* @param tokenizer The tokenizer to use when tokenizing an utterance.
* @param slotMatcher The slot matcher to use to extract slots from the utterance.
*/
public OpenNLPIntentMatcher(URL intentModelUrl, Tokenizer tokenizer, SlotMatcher slotMatcher, float minMatchScore, float maybeMatchScore)
{
super(tokenizer, slotMatcher, minMatchScore, maybeMatchScore);
try
{
model = new DoccatModel(intentModelUrl);
}
catch (Exception e)
{
throw new IllegalArgumentException("Unable to load intent model", e);
}
}
示例8: testModelLoad
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
@Test
public void testModelLoad()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-cat-taxi-intents.bin");
assertThat(modelUrl, is(notNullValue()));
DoccatModel model = new DoccatModel(modelUrl);
assertThat(model, is(notNullValue()));
}
示例9: initialize
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public void initialize(UimaContext aContext)
throws ResourceInitializationException {
super.initialize(aContext);
try {
InputStream is = this.getClass().getResourceAsStream((String) aContext.getConfigParameterValue(MODEL_NAME_PARAM));
DoccatModel m = new DoccatModel(is);
categorizer = new DocumentCategorizerME(m);
}catch(IOException e){
throw new ResourceInitializationException(e);
}
}
示例10: train
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public static void train(String file_train, String file_model) throws IOException {
DoccatModel model = null;
ObjectStream<String> lineStream =
new PlainTextByLineStream(new MarkableFileInputStreamFactory(
new File(file_train)), "UTF-8");
ObjectStream<DocumentSample> sampleStream =
new DocumentSampleStream(lineStream);
TrainingParameters param = TrainingParameters.defaultParams();
DoccatFactory factory = new DoccatFactory();
model = DocumentCategorizerME.train("en", sampleStream,param,factory);
model.serialize(new FileOutputStream(file_model));
}
示例11: classify
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public static void classify(String modelFile, String inputText) throws InvalidFormatException, IOException {
InputStream modelIn = new FileInputStream(modelFile);
DoccatModel model = new DoccatModel(modelIn);
DocumentCategorizerME categorizer = new DocumentCategorizerME(model);
double[] outcomes = categorizer.categorize(inputText);
for (int i = 0; i < categorizer.getNumberOfCategories(); i++)
{
System.out.println(categorizer.getCategory(i) + " - " + outcomes[i]);
}
System.out.println(categorizer.getBestCategory(outcomes));
System.out.println(categorizer.getAllResults(outcomes));
}
示例12: initModel
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
private void initModel() throws Exception {
// Regex model
regexClassifierHighScore = getRegexs("ini/regex_high_score.txt");
regexClassifierMediumScore = getRegexs("ini/regex_medium_score.txt");
regexClassifierLowScore = getRegexs("ini/regex_low_score.txt");
// Open NLP classifier
DoccatModel m = new DoccatModel(new File("model/open_comments.model"));
openNLPClassifier = new DocumentCategorizerME(m);
// Weka NaiveBayes classifier
wekaNBClassifier = (Classifier) SerializationHelper.read(new FileInputStream("model/nb_comments.model"));
// Weka SGD Classifier
// wekaSGDClassifier = (Classifier) SerializationHelper.read(new
// FileInputStream("model/sgd_comments.model"));
// // Weka classifer J48
//wekaJ48Classifier = (Classifier) SerializationHelper.read(new FileInputStream("model/j48_comments.model"));
// // Weka SMO comments
// wekaSMOClassifier = (Classifier) SerializationHelper.read(new
// FileInputStream("model/smo_comments.model"));
// This needs to be removed, only used to copy the structure when
// classifing
wekaARFF = getInstancesFromARFF("model/comments.arff");
wekaARFF.setClassIndex(wekaARFF.numAttributes() - 1);
// ObjectInputStream oin = new ObjectInputStream(new FileInputStream("model/StringToWordVector.filter"));
// filter = (StringToWordVector) oin.readObject();
// oin.close();
// filter.setInputFormat(wekaARFF);
//
// Instances trainFiltered = Filter.useFilter(wekaARFF, filter);
// trainFiltered.setClassIndex(0);
//System.out.println(filter);
}
示例13: testCategorization
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
@Test
public void testCategorization()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-cat-taxi-intents.bin");
assertThat(modelUrl, is(notNullValue()));
DoccatModel model = new DoccatModel(modelUrl);
assertThat(model, is(notNullValue()));
DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);
// model was built with OpenNLP whitespace tokenizer
OpenNLPTokenizer tokenizer = new OpenNLPTokenizer(WhitespaceTokenizer.INSTANCE);
String category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Order me a taxi")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send me a taxi")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer
.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Send a taxi to 12 Pleasent Street")));
assertThat(category, is(notNullValue()));
assertThat(category, is("OrderTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Cancel my cab")));
assertThat(category, is(notNullValue()));
assertThat(category, is("CancelTaxi"));
category = myCategorizer.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("Where is my taxi ?")));
assertThat(category, is(notNullValue()));
assertThat(category, is("WhereTaxi"));
category = myCategorizer
.getBestCategory(myCategorizer.categorize(tokenizer.tokenize("The address is 136 River Road")));
assertThat(category, is(notNullValue()));
assertThat(category, is("GaveAddress"));
}
示例14: getInstance
import opennlp.tools.doccat.DoccatModel; //导入依赖的package包/类
public static LanguageIdentifier getInstance() throws InvalidFormatException, IOException {
LanguageIdentifier li = new LanguageIdentifier();
DoccatModel model = new DoccatModel(li.getClass().getResourceAsStream("/nakala/lang.model"));
li.doccat = new DocumentCategorizerME(model);
return li;
}