本文整理汇总了Java中banner.tokenization.NaiveTokenizer类的典型用法代码示例。如果您正苦于以下问题:Java NaiveTokenizer类的具体用法?Java NaiveTokenizer怎么用?Java NaiveTokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
NaiveTokenizer类属于banner.tokenization包,在下文中一共展示了NaiveTokenizer类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testNo2
import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testNo2() {
NaiveTokenizer tokenizer = new NaiveTokenizer();
DictionaryTagger dictionaryTagger = new DictionaryTagger(tokenizer,
true);
assertEquals(0, dictionaryTagger.size());
dictionaryTagger.add("GENES", MentionType.getType("GENE"));
assertEquals(1, dictionaryTagger.size());
dictionaryTagger.add("involved in", MentionType.getType("ACTION")); // Sentence does
// not include
// this text
dictionaryTagger.add("axon guidance", MentionType.getType("BIOP"));
dictionaryTagger.add("C.elegans", MentionType.getType("ORGM"));
Sentence sentence = new Sentence(
"What [GENES] are involved axon guidance in C.elegans?");
tokenizer.tokenize(sentence);
assertEquals(13, sentence.getTokens().size());
assertEquals(0, sentence.getMentions().size());
dictionaryTagger.tag(sentence);
List<Mention> mentions = sentence.getMentions();
assertEquals(3, mentions.size());
assertEquals(1, mentions.get(0).getTokens().size());
assertEquals(2, mentions.get(1).getTokens().size());
assertEquals(3, mentions.get(2).getTokens().size());
}
示例2: testNo1
import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testNo1() {
NaiveTokenizer tokenizer = new NaiveTokenizer();
Sentence sentence = new Sentence(
"What [GENES] are involved in the melanogenesis of human lung cancers?");
tokenizer.tokenize(sentence);
List<Token> sentenceTokens = sentence.getTokens();
assertEquals(14, sentenceTokens.size());
assertEquals(0, sentence.getMentions().size());
Mention mention = new Mention(sentence, MentionType.getType("GENE"), 2, 3);
List<Token> mentionTokens = mention.getTokens();
assertEquals(1, mentionTokens.size());
assertEquals(sentenceTokens.get(2), mentionTokens.get(0));
assertEquals("GENES", mention.getText());
}
示例3: testNo1
import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testNo1() {
NaiveTokenizer tokenizer = new NaiveTokenizer();
DictionaryTagger dictionaryTagger = new DictionaryTagger(tokenizer,
true);
assertEquals(0, dictionaryTagger.size());
dictionaryTagger.add("GENES", MentionType.getType("GENE"));
assertEquals(1, dictionaryTagger.size());
dictionaryTagger.add("melanogenesis", MentionType.getType("BIOP"));
assertEquals(2, dictionaryTagger.size());
dictionaryTagger.add("human", MentionType.getType("ORGM"));
assertEquals(3, dictionaryTagger.size());
dictionaryTagger.add("lung cancers", MentionType.getType("DISE"));
assertEquals(4, dictionaryTagger.size());
Sentence sentence = new Sentence(
"What [GENES] are involved in the melanogenesis of human lung cancers?");
tokenizer.tokenize(sentence);
assertEquals(14, sentence.getTokens().size());
assertEquals(0, sentence.getMentions().size());
dictionaryTagger.tag(sentence);
List<Mention> mentions = sentence.getMentions();
assertEquals(4, mentions.size());
assertEquals(1, mentions.get(0).getTokens().size());
assertEquals(1, mentions.get(1).getTokens().size());
assertEquals(1, mentions.get(2).getTokens().size());
assertEquals(2, mentions.get(3).getTokens().size());
}
示例4: load
import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
/**
* Loads the properties file from the specified filename, and instantiates any objects to be used, such as the lemmatiser and part-of-speech (pos)
* tagger
*
* @param filename
* @return An instance of {@link BannerProperties} which can be queried for configuration parameters
*/
public static BannerProperties load(String filename)
{
Properties properties = new Properties();
BannerProperties bannerProperties = new BannerProperties();
try {
properties.load(new FileInputStream(filename));
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
if (tokenizer.equals(NaiveTokenizer.class.getName()))
bannerProperties.tokenizer = new NaiveTokenizer();
else if (tokenizer.equals(SimpleTokenizer.class.getName()))
bannerProperties.tokenizer = new SimpleTokenizer();
else if (tokenizer.equals(BaseTokenizer.class.getName()))
bannerProperties.tokenizer = new BaseTokenizer();
else
throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
} catch (Exception e) {
throw new RuntimeException(e);
}
return bannerProperties;
}
示例5: testSimple
import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testSimple() {
NaiveTokenizer tokenizer = new NaiveTokenizer();
Sentence sentence = new Sentence("AA 12 - AA12 AA-12 (12AA) 12.-.");
tokenizer.tokenize(sentence);
List<Token> tokens = sentence.getTokens();
assertEquals(0, tokens.get(0).getStart());
assertEquals(2, tokens.get(0).getEnd());
assertEquals("AA", tokens.get(0).getText());
assertEquals(3, tokens.get(1).getStart());
assertEquals(5, tokens.get(1).getEnd());
assertEquals("12", tokens.get(1).getText());
assertEquals(6, tokens.get(2).getStart());
assertEquals(7, tokens.get(2).getEnd());
assertEquals("-", tokens.get(2).getText());
assertEquals(8, tokens.get(3).getStart());
assertEquals(10, tokens.get(3).getEnd());
assertEquals("AA", tokens.get(3).getText());
assertEquals(10, tokens.get(4).getStart());
assertEquals(12, tokens.get(4).getEnd());
assertEquals("12", tokens.get(4).getText());
assertEquals(13, tokens.get(5).getStart());
assertEquals(15, tokens.get(5).getEnd());
assertEquals("AA", tokens.get(5).getText());
assertEquals(15, tokens.get(6).getStart());
assertEquals(16, tokens.get(6).getEnd());
assertEquals("-", tokens.get(6).getText());
assertEquals(16, tokens.get(7).getStart());
assertEquals(18, tokens.get(7).getEnd());
assertEquals("12", tokens.get(7).getText());
assertEquals(19, tokens.get(8).getStart());
assertEquals(20, tokens.get(8).getEnd());
assertEquals("(", tokens.get(8).getText());
assertEquals(20, tokens.get(9).getStart());
assertEquals(22, tokens.get(9).getEnd());
assertEquals("12", tokens.get(9).getText());
assertEquals(22, tokens.get(10).getStart());
assertEquals(24, tokens.get(10).getEnd());
assertEquals("AA", tokens.get(10).getText());
assertEquals(24, tokens.get(11).getStart());
assertEquals(25, tokens.get(11).getEnd());
assertEquals(")", tokens.get(11).getText());
assertEquals(26, tokens.get(12).getStart());
assertEquals(28, tokens.get(12).getEnd());
assertEquals("12", tokens.get(12).getText());
assertEquals(28, tokens.get(13).getStart());
assertEquals(29, tokens.get(13).getEnd());
assertEquals(".", tokens.get(13).getText());
assertEquals(29, tokens.get(14).getStart());
assertEquals(30, tokens.get(14).getEnd());
assertEquals("-", tokens.get(14).getText());
assertEquals(30, tokens.get(15).getStart());
assertEquals(31, tokens.get(15).getEnd());
assertEquals(".", tokens.get(15).getText());
}
示例6: load
import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
public static BannerProperties load(String filename, String dataroot)
{
Properties properties = new Properties();
BannerProperties bannerProperties = new BannerProperties();
try {
properties.load(new FileInputStream(filename));
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(dataroot+lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(dataroot+posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(dataroot+posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
if (tokenizer.equals(NaiveTokenizer.class.getName()))
bannerProperties.tokenizer = new NaiveTokenizer();
else if (tokenizer.equals(SimpleTokenizer.class.getName()))
bannerProperties.tokenizer = new SimpleTokenizer();
else if (tokenizer.equals(BaseTokenizer.class.getName()))
bannerProperties.tokenizer = new BaseTokenizer();
else
throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
} catch (Exception e) {
throw new RuntimeException(e);
}
return bannerProperties;
}