本文整理汇总了Java中opennlp.tools.namefind.TokenNameFinderModel类的典型用法代码示例。如果您正苦于以下问题:Java TokenNameFinderModel类的具体用法?Java TokenNameFinderModel怎么用?Java TokenNameFinderModel使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TokenNameFinderModel类属于opennlp.tools.namefind包,在下文中一共展示了TokenNameFinderModel类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: loadNameFinders
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
protected void loadNameFinders(String language, String modelDirectory) throws IOException {
//<start id="maxent.examples.namefinder.setup"/>
File modelFile;
File[] models //<co id="nfe.findmodels"/>
= findNameFinderModels(language, modelDirectory);
modelNames = new String[models.length];
finders = new NameFinderME[models.length];
for (int fi = 0; fi < models.length; fi++) {
modelFile = models[fi];
modelNames[fi] = modelNameFromFile(language, modelFile); //<co id="nfe.modelname"/>
log.info("Loading model {}", modelFile);
InputStream modelStream = new FileInputStream(modelFile);
TokenNameFinderModel model = //<co id="nfe.modelreader"/>
new TokenNameFinderModel(modelStream);
finders[fi] = new NameFinderME(model);
}
}
示例2: buildModel
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Override
public void buildModel(String entityType) {
try {
System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");
System.out.println("\t\treading training data...");
Charset charset = Charset.forName("UTF-8");
ObjectStream<String> lineStream =
new PlainTextByLineStream(new MarkableFileInputStreamFactory(params.getAnnotatedTrainingDataFile()), charset);
ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);
TokenNameFinderModel model;
model = NameFinderME.train("en", entityType, sampleStream, null);
sampleStream.close();
OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()));
model.serialize(modelOut);
if (modelOut != null) {
modelOut.close();
}
System.out.println("\tmodel generated");
} catch (Exception e) {
}
}
示例3: start
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
protected OpenNlpService start() {
StopWatch sw = new StopWatch("models-loading");
Map<String, String> settingsMap = IngestOpenNlpPlugin.MODEL_FILE_SETTINGS.getAsMap(settings);
for (Map.Entry<String, String> entry : settingsMap.entrySet()) {
String name = entry.getKey();
sw.start(name);
Path path = configDirectory.resolve(entry.getValue());
try (InputStream is = Files.newInputStream(path)) {
nameFinderModels.put(name, new TokenNameFinderModel(is));
} catch (IOException e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("Could not load model [{}] with path [{}]", name, path), e);
}
sw.stop();
}
if (settingsMap.keySet().size() == 0) {
logger.error("Did not load any models for ingest-opennlp plugin, none configured");
} else {
logger.info("Read models in [{}] for {}", sw.totalTime(), settingsMap.keySet());
}
return this;
}
示例4: find
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
public Set<String> find(String content, String field) {
try {
if (!nameFinderModels.containsKey(field)) {
throw new ElasticsearchException("Could not find field [{}], possible values {}", field, nameFinderModels.keySet());
}
TokenNameFinderModel finderModel= nameFinderModels.get(field);
if (threadLocal.get() == null || !threadLocal.get().equals(finderModel)) {
threadLocal.set(finderModel);
}
String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
Span spans[] = new NameFinderME(finderModel).find(tokens);
String[] names = Span.spansToStrings(spans, tokens);
return Sets.newHashSet(names);
} finally {
threadLocal.remove();
}
}
示例5: testPersonNER
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Test
public void testPersonNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-persons.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE
.tokenize("Mr. John Smith of New York, married Anne Green of London today.");
assertThat(tokens.length, is(15));
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(2));
String[] names = Span.spansToStrings(spans, tokens);
assertThat(names.length, is(2));
assertThat(names[0], is("John Smith"));
assertThat(names[1], is("Anne Green"));
}
示例6: testLocationNER
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Test
public void testLocationNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-locations.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE
.tokenize("Mr. John Smith of New York, married Anne Green of London today.");
assertThat(tokens.length, is(15));
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(2));
String[] locations = Span.spansToStrings(spans, tokens);
assertThat(locations.length, is(2));
assertThat(locations[0], is("New York"));
assertThat(locations[1], is("London"));
}
示例7: testDateNER
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Test
public void testDateNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-dates.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE
.tokenize("Mr. John Smith of New York, married Anne Green of London today.");
assertThat(tokens.length, is(15));
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(1));
String[] locations = Span.spansToStrings(spans, tokens);
assertThat(locations.length, is(1));
assertThat(locations[0], is("today"));
}
示例8: testAddressNER
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Test
public void testAddressNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-address.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE.tokenize("Send a taxi to 12 Pleasent Street");
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(1));
String[] locations = Span.spansToStrings(spans, tokens);
assertThat(locations.length, is(1));
assertThat(locations[0], is("12 Pleasent Street"));
}
示例9: trainNameFinder
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
public static void trainNameFinder(final String inResource, String outFile) throws IOException {
InputStreamFactory inputStreamFactory = new InputStreamFactory() {
@Override
public InputStream createInputStream() throws IOException {
return Trainer.class.getResourceAsStream(inResource);
}
};
InputStream in = Trainer.class.getResourceAsStream(inResource);
NameSampleDataStream samples = new NameSampleDataStream(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
TrainingParameters trainingParameters = new TrainingParameters();
trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, "5");
trainingParameters.put(TrainingParameters.CUTOFF_PARAM, "200");
byte[] featureGeneratorBytes = null;
Map<String, Object> resources = Collections.<String, Object>emptyMap();
SequenceCodec<String> seqCodec = new BioCodec();
TokenNameFinderFactory tokenNameFinderFactory = TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, seqCodec);
TokenNameFinderModel model = NameFinderME.train("en", "person", samples, trainingParameters, tokenNameFinderFactory);
//NameFinderME.train("en", "person", samples, Collections.<String, Object>emptyMap(), 200, 5);
samples.close();
FileOutputStream out = new FileOutputStream(outFile);
model.serialize(out);
out.close();
}
示例10: getAllNameEntitiesfromInput
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
public void getAllNameEntitiesfromInput(InputStream stream)
throws InvalidFormatException, IOException {
InputStream modelIn = new FileInputStream(nerModelPath);
TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
NameFinderME nameFinder = new NameFinderME(model);
String[] in = IOUtils.toString(stream, "UTF-8").split(" ");
Span nameE[] = nameFinder.find(in);
String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
spanNames = spanNames.substring(1, spanNames.length() - 1);
modelIn.close();
String[] tmp = spanNames.split(",");
for (String name : tmp) {
name = name.trim();
this.locationNameEntities.add(name);
}
}
示例11: names
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Nullable public static Map<String,String[]> names(String input) {
NameFinderME[] finders = (NameFinderME[]) models.get(TokenNameFinderModel.class);
String[] tokens = tokenizer().tokenize(input);
Map<String,String[]> x = new HashMap(finders.length);
for (NameFinderME m : finders) {
Span[] ss = m.find(tokens);
if (ss.length > 0)
x.put(ss[0].getType(), Span.spansToStrings(ss, tokens));
}
if (!x.isEmpty()) {
return x;
} else {
return null;
}
}
示例12: startStage
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Override
public void startStage(StageConfiguration config) {
// parse the config to map the params properly
textField = config.getProperty("textField", textField);
peopleField = config.getProperty("peopleField", peopleField);
posTextField = config.getProperty("posTextField", posTextField);
try {
// Sentence finder
SentenceModel sentModel = new SentenceModel(new FileInputStream(sentenceModelFile));
sentenceDetector = new SentenceDetectorME(sentModel);
// tokenizer
TokenizerModel tokenModel = new TokenizerModel(new FileInputStream(tokenModelFile));
tokenizer = new TokenizerME(tokenModel);
// person name finder
TokenNameFinderModel nameModel = new TokenNameFinderModel(new FileInputStream(personModelFile));
nameFinder = new NameFinderME(nameModel);
// load the part of speech tagger.
posTagger = new POSTaggerME(new POSModel(new FileInputStream(posModelFile)));
} catch (IOException e) {
log.info("Error loading up OpenNLP Models. {}", e.getLocalizedMessage());
e.printStackTrace();
}
}
示例13: tokenize
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
public Map<String, Set<String>> tokenize(String content) {
Map<String, Set<String>> namedEntities = Maps.newHashMap();
List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>();
String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
for (Map.Entry<String, TokenNameFinderModel> finderEntry : finders.entrySet()) {
String type = finderEntry.getKey();
NameFinderME finder = new NameFinderME(finderEntry.getValue());
Span[] spans = finder.find(tokens);
double[] probs = finder.probs(spans);
for (int ni = 0; ni < spans.length; ni++) {
allTextAnnotations.add(new TextAnnotation(type, spans[ni], probs[ni]));
}
}
if (allTextAnnotations.size() > 0 ) {
removeConflicts(allTextAnnotations);
}
convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities);
return namedEntities;
}
示例14: initNameFinder
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
private void initNameFinder() {
try (InputStream modelIn = NlpVerticle.class.getResourceAsStream("/nlp/en-ner-person.bin")) {
TokenNameFinderModel model = new TokenNameFinderModel(modelIn);
nameFinder = new NameFinderME(model);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例15: OpenNlpNameFinder
import opennlp.tools.namefind.TokenNameFinderModel; //导入依赖的package包/类
@Inject
public OpenNlpNameFinder(
@Model(LOCATION_MODEL) Resource location,
@Model(ORGANIZATION_MODEL) Resource org,
@Model(PERSON_MODEL) Resource person)
{
try {
models[0] = new TokenNameFinderModel(location.binaryRead());
models[1] = new TokenNameFinderModel(org.binaryRead());
models[2] = new TokenNameFinderModel(person.binaryRead());
} catch (IOException e) {
throw new IOError(e);
}
}