本文整理汇总了Java中opennlp.tools.util.Span.spansToStrings方法的典型用法代码示例。如果您正苦于以下问题:Java Span.spansToStrings方法的具体用法?Java Span.spansToStrings怎么用?Java Span.spansToStrings使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类opennlp.tools.util.Span
的用法示例。
在下文中一共展示了Span.spansToStrings方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: interpret
import opennlp.tools.util.Span; //导入方法依赖的package包/类
public Intent interpret(String query) {
String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(query);
double[] outcome = categorizer.categorize(tokens);
logger.debug(categorizer.getAllResults(outcome));
Intent intent = new Intent(categorizer.getBestCategory(outcome));
for (NameFinderME nameFinderME : nameFinderMEs) {
Span[] spans = nameFinderME.find(tokens);
String[] names = Span.spansToStrings(spans, tokens);
for (int i = 0; i < spans.length; i++) {
intent.getEntities().put(spans[i].getType(), names[i]);
}
}
logger.debug(intent.toString());
return intent;
}
示例2: getNamedEntity
import opennlp.tools.util.Span; //导入方法依赖的package包/类
/**
* Gets the named entity.
*
* @param tokenSpan the token span
* @return the named entity
*/
public static NamedEntity[] getNamedEntity(TokenSpan tokenSpan) {
Span[] spans = tokenSpan.getSpans();
String[] tokens = tokenSpan.getTokens();
String[] spanText = Span.spansToStrings(spans, tokens);
NamedEntity[] namedEntities = new NamedEntity[spans.length];
for (int i = 0; i < spans.length; i++) {
NamedEntity entity = new NamedEntity();
entity.setEntity(spanText[i]);
entity.setType(spans[i].getType().split("\\|"));
namedEntities[i] = entity;
}
return namedEntities;
}
示例3: find
import opennlp.tools.util.Span; //导入方法依赖的package包/类
public Set<String> find(String content, String field) {
try {
if (!nameFinderModels.containsKey(field)) {
throw new ElasticsearchException("Could not find field [{}], possible values {}", field, nameFinderModels.keySet());
}
TokenNameFinderModel finderModel= nameFinderModels.get(field);
if (threadLocal.get() == null || !threadLocal.get().equals(finderModel)) {
threadLocal.set(finderModel);
}
String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
Span spans[] = new NameFinderME(finderModel).find(tokens);
String[] names = Span.spansToStrings(spans, tokens);
return Sets.newHashSet(names);
} finally {
threadLocal.remove();
}
}
示例4: testPersonNER
import opennlp.tools.util.Span; //导入方法依赖的package包/类
@Test
public void testPersonNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-persons.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE
.tokenize("Mr. John Smith of New York, married Anne Green of London today.");
assertThat(tokens.length, is(15));
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(2));
String[] names = Span.spansToStrings(spans, tokens);
assertThat(names.length, is(2));
assertThat(names[0], is("John Smith"));
assertThat(names[1], is("Anne Green"));
}
示例5: testLocationNER
import opennlp.tools.util.Span; //导入方法依赖的package包/类
@Test
public void testLocationNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-locations.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE
.tokenize("Mr. John Smith of New York, married Anne Green of London today.");
assertThat(tokens.length, is(15));
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(2));
String[] locations = Span.spansToStrings(spans, tokens);
assertThat(locations.length, is(2));
assertThat(locations[0], is("New York"));
assertThat(locations[1], is("London"));
}
示例6: testDateNER
import opennlp.tools.util.Span; //导入方法依赖的package包/类
@Test
public void testDateNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-dates.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE
.tokenize("Mr. John Smith of New York, married Anne Green of London today.");
assertThat(tokens.length, is(15));
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(1));
String[] locations = Span.spansToStrings(spans, tokens);
assertThat(locations.length, is(1));
assertThat(locations[0], is("today"));
}
示例7: testAddressNER
import opennlp.tools.util.Span; //导入方法依赖的package包/类
@Test
public void testAddressNER()
throws Exception
{
URL modelUrl = Thread.currentThread().getContextClassLoader()
.getResource("models/en-ner-address.bin");
assertThat(modelUrl, is(notNullValue()));
TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
assertThat(model, is(notNullValue()));
NameFinderME nameFinder = new NameFinderME(model);
String[] tokens = SimpleTokenizer.INSTANCE.tokenize("Send a taxi to 12 Pleasent Street");
Span[] spans = nameFinder.find(tokens);
assertThat(spans.length, is(1));
String[] locations = Span.spansToStrings(spans, tokens);
assertThat(locations.length, is(1));
assertThat(locations[0], is("12 Pleasent Street"));
}
示例8: generateTokensFromSentence
import opennlp.tools.util.Span; //导入方法依赖的package包/类
/**
* Generate a list of tokens for the specified sentence.
*
* @param sentenceRange
* The {@link TextRange} of the sentence to tokenize
* @param paragraph
* The {@link ExtractedParagraph} the sentence is within
* @param paragraphStartCharIndex
* The story character index of the beginning of the paragraph. Note that only paragraphs of {@link ParagraphType.TEXT} are counted
* when considering the overall story character index.
* @return
*/
private List<TokenImpl> generateTokensFromSentence(final TextRange sentenceRange, final ExtractedParagraph paragraph,
final int paragraphStartCharIndex) {
// Get sentence string and tokenize it
final String sentence = paragraph.getText().substring(sentenceRange.getStartIndex() - paragraphStartCharIndex,
sentenceRange.getEndIndex() - paragraphStartCharIndex);
final Span[] tokenSpans = this.tokenizer.tokenizePos(sentence.toString());
final String[] tokenTexts = Span.spansToStrings(tokenSpans, sentence);
// Do part of speech tagging on token strings
final String[] tokenPosTags = this.posTagger.tag(tokenTexts);
// Create token objects
// Generate the ranges the tokens cover. This will make the tokens cover any white space within the sentence as well
final List<TextRange> ranges = generateTextRangesFromSpans(tokenSpans, sentenceRange);
final List<TokenImpl> tokens = new ArrayList<>(ranges.size());
for (int i = 0; i < ranges.size(); ++i) {
tokens.add(constructToken(ranges.get(i), tokenTexts[i], tokenPosTags[i], paragraph, paragraphStartCharIndex));
}
return tokens;
}
示例9: findPeople
import opennlp.tools.util.Span; //导入方法依赖的package包/类
public List<Extraction> findPeople(InputExtraction paragraph) {
Span names[] = null;
Set<Extraction> people = new HashSet<Extraction>();
String[] st = paragraph.getText().split("\\s");
names = nameDetector.find(st);
String[] namesStr = Span.spansToStrings(names, st);
for (String str : namesStr) {
Extraction e = new Extraction();
e.setExtraction(str);
people.add(e);
}
return new ArrayList<Extraction>(people);
}
示例10: findPlaces
import opennlp.tools.util.Span; //导入方法依赖的package包/类
public List<Extraction> findPlaces(InputExtraction paragraph) {
Span names[] = null;
Set<Extraction> places = new HashSet<Extraction>();
String[] st = paragraph.getText().split("\\s");
names = locationDetector.find(st);
String[] namesStr = Span.spansToStrings(names, st);
for (String str : namesStr) {
Extraction e = new Extraction();
e.setExtraction(str);
places.add(e);
}
return new ArrayList<Extraction>(places);
}
示例11: findDates
import opennlp.tools.util.Span; //导入方法依赖的package包/类
public List<Extraction> findDates(InputExtraction paragraph) {
Span names[] = null;
Set<Extraction> dates = new HashSet<Extraction>();
String[] st = paragraph.getText().split("\\s");
names = dateDetector.find(st);
String[] namesStr = Span.spansToStrings(names, st);
for (String str : namesStr) {
Extraction e = new Extraction();
e.setExtraction(str);
dates.add(e);
}
return new ArrayList<Extraction>(dates);
}
示例12: parsePassageText
import opennlp.tools.util.Span; //导入方法依赖的package包/类
public Parse[] parsePassageText(String p) throws InvalidFormatException{
if (!modelsAreInitialized)init();
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
NameFinderME nameFinder = new NameFinderME(this.nerModel);
Parser parser = ParserFactory.create(
this.parserModel,
20, // beam size
0.95); // advance percentage
//find sentences, tokenize each, parse each, return top parse for each
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
//StringTokenizer st = new StringTokenizer(tks[i]);
//There are several tokenizers available. SimpleTokenizer works best
Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
for (int si = 0; si < sentences.length; si++) {
Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
Span[] names = nameFinder.find(tokens);
for (int ni = 0; ni < names.length; ni++) {
Span startSpan = tokenSpans[names[ni].getStart()];
int nameStart = startSpan.getStart();
Span endSpan = tokenSpans[names[ni].getEnd() - 1];
int nameEnd = endSpan.getEnd();
String name = sentences[si].substring(nameStart, nameEnd);
System.out.println(name);
}
}
String sent= StringUtils.join(tokenizer," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例13: getTaggedEntity
import opennlp.tools.util.Span; //导入方法依赖的package包/类
/**
* Gets the tagged entity.
*
* @param text the text
* @return the tagged entity
*/
public String[] getTaggedEntity(String text) {
TokenSpan tokenSpan = getTags(text);
return Span.spansToStrings(tokenSpan.getSpans(), tokenSpan.getTokens());
}