当前位置: 首页>>代码示例>>Java>>正文


Java NaiveTokenizer类代码示例

本文整理汇总了Java中banner.tokenization.NaiveTokenizer的典型用法代码示例。如果您正苦于以下问题:Java NaiveTokenizer类的具体用法?Java NaiveTokenizer怎么用?Java NaiveTokenizer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


NaiveTokenizer类属于banner.tokenization包,在下文中一共展示了NaiveTokenizer类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testNo2

import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testNo2() {
	NaiveTokenizer tokenizer = new NaiveTokenizer();
	DictionaryTagger dictionaryTagger = new DictionaryTagger(tokenizer,
			true);
	assertEquals(0, dictionaryTagger.size());
	dictionaryTagger.add("GENES", MentionType.getType("GENE"));
	assertEquals(1, dictionaryTagger.size());
	dictionaryTagger.add("involved in", MentionType.getType("ACTION")); // Sentence does
	// not include
	// this text
	dictionaryTagger.add("axon guidance", MentionType.getType("BIOP"));
	dictionaryTagger.add("C.elegans", MentionType.getType("ORGM"));
	Sentence sentence = new Sentence(
			"What [GENES] are involved axon guidance in C.elegans?");
	tokenizer.tokenize(sentence);
	assertEquals(13, sentence.getTokens().size());
	assertEquals(0, sentence.getMentions().size());
	dictionaryTagger.tag(sentence);
	List<Mention> mentions = sentence.getMentions();
	assertEquals(3, mentions.size());
	assertEquals(1, mentions.get(0).getTokens().size());
	assertEquals(2, mentions.get(1).getTokens().size());
	assertEquals(3, mentions.get(2).getTokens().size());

}
 
开发者ID:leebird,项目名称:legonlp,代码行数:27,代码来源:DictionaryTaggerTest.java

示例2: testNo1

import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testNo1() {
	NaiveTokenizer tokenizer = new NaiveTokenizer();
	Sentence sentence = new Sentence(
			"What [GENES] are involved in the melanogenesis of human lung cancers?");
	tokenizer.tokenize(sentence);
	List<Token> sentenceTokens = sentence.getTokens();
	assertEquals(14, sentenceTokens.size());
	assertEquals(0, sentence.getMentions().size());
	Mention mention = new Mention(sentence, MentionType.getType("GENE"), 2, 3);
	List<Token> mentionTokens = mention.getTokens();
	assertEquals(1, mentionTokens.size());
	assertEquals(sentenceTokens.get(2), mentionTokens.get(0));
	assertEquals("GENES", mention.getText());
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:16,代码来源:MentionTest.java

示例3: testNo1

import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testNo1() {
	NaiveTokenizer tokenizer = new NaiveTokenizer();
	DictionaryTagger dictionaryTagger = new DictionaryTagger(tokenizer,
			true);
	assertEquals(0, dictionaryTagger.size());
	dictionaryTagger.add("GENES", MentionType.getType("GENE"));
	assertEquals(1, dictionaryTagger.size());
	dictionaryTagger.add("melanogenesis", MentionType.getType("BIOP"));
	assertEquals(2, dictionaryTagger.size());
	dictionaryTagger.add("human", MentionType.getType("ORGM"));
	assertEquals(3, dictionaryTagger.size());
	dictionaryTagger.add("lung cancers", MentionType.getType("DISE"));
	assertEquals(4, dictionaryTagger.size());
	Sentence sentence = new Sentence(
			"What [GENES] are involved in the melanogenesis of human lung cancers?");
	tokenizer.tokenize(sentence);
	assertEquals(14, sentence.getTokens().size());
	assertEquals(0, sentence.getMentions().size());
	dictionaryTagger.tag(sentence);
	List<Mention> mentions = sentence.getMentions();
	assertEquals(4, mentions.size());
	assertEquals(1, mentions.get(0).getTokens().size());
	assertEquals(1, mentions.get(1).getTokens().size());
	assertEquals(1, mentions.get(2).getTokens().size());
	assertEquals(2, mentions.get(3).getTokens().size());
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:28,代码来源:DictionaryTaggerTest.java

示例4: load

import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
/**
    * Loads the properties file from the specified filename, and instantiates any objects to be used, such as the lemmatiser and part-of-speech (pos)
    * tagger
    * 
    * @param filename
    * @return An instance of {@link BannerProperties} which can be queried for configuration parameters
    */
public static BannerProperties load(String filename)
{
	
	Properties properties = new Properties();
	BannerProperties bannerProperties = new BannerProperties();
	try {
		properties.load(new FileInputStream(filename));
		String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
		if (lemmatiserDataDirectory != null)
			bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
		String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
		if (posTaggerDataDirectory != null)
		{
			String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
			if (posTagger.equals(HeppleTagger.class.getName()))
				bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
			else if (posTagger.equals(MedPostTagger.class.getName()))
				bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
			else
				throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
		}
		String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
		if (tokenizer.equals(NaiveTokenizer.class.getName()))
			bannerProperties.tokenizer = new NaiveTokenizer();
		else if (tokenizer.equals(SimpleTokenizer.class.getName()))
			bannerProperties.tokenizer = new SimpleTokenizer();
		else if (tokenizer.equals(BaseTokenizer.class.getName()))
			bannerProperties.tokenizer = new BaseTokenizer();
		else
			throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
		bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
		if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
			bannerProperties.postProcessor = new ParenthesisPostProcessor();
		bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
		bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
		bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
		bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
	return bannerProperties;
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:50,代码来源:BannerProperties.java

示例5: testSimple

import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
@Test
public void testSimple() {
	NaiveTokenizer tokenizer = new NaiveTokenizer();
	Sentence sentence = new Sentence("AA 12 - AA12 AA-12 (12AA) 12.-.");
	tokenizer.tokenize(sentence);
	List<Token> tokens = sentence.getTokens();

	assertEquals(0, tokens.get(0).getStart());
	assertEquals(2, tokens.get(0).getEnd());
	assertEquals("AA", tokens.get(0).getText());

	assertEquals(3, tokens.get(1).getStart());
	assertEquals(5, tokens.get(1).getEnd());
	assertEquals("12", tokens.get(1).getText());

	assertEquals(6, tokens.get(2).getStart());
	assertEquals(7, tokens.get(2).getEnd());
	assertEquals("-", tokens.get(2).getText());

	assertEquals(8, tokens.get(3).getStart());
	assertEquals(10, tokens.get(3).getEnd());
	assertEquals("AA", tokens.get(3).getText());

	assertEquals(10, tokens.get(4).getStart());
	assertEquals(12, tokens.get(4).getEnd());
	assertEquals("12", tokens.get(4).getText());

	assertEquals(13, tokens.get(5).getStart());
	assertEquals(15, tokens.get(5).getEnd());
	assertEquals("AA", tokens.get(5).getText());

	assertEquals(15, tokens.get(6).getStart());
	assertEquals(16, tokens.get(6).getEnd());
	assertEquals("-", tokens.get(6).getText());

	assertEquals(16, tokens.get(7).getStart());
	assertEquals(18, tokens.get(7).getEnd());
	assertEquals("12", tokens.get(7).getText());

	assertEquals(19, tokens.get(8).getStart());
	assertEquals(20, tokens.get(8).getEnd());
	assertEquals("(", tokens.get(8).getText());

	assertEquals(20, tokens.get(9).getStart());
	assertEquals(22, tokens.get(9).getEnd());
	assertEquals("12", tokens.get(9).getText());

	assertEquals(22, tokens.get(10).getStart());
	assertEquals(24, tokens.get(10).getEnd());
	assertEquals("AA", tokens.get(10).getText());

	assertEquals(24, tokens.get(11).getStart());
	assertEquals(25, tokens.get(11).getEnd());
	assertEquals(")", tokens.get(11).getText());

	assertEquals(26, tokens.get(12).getStart());
	assertEquals(28, tokens.get(12).getEnd());
	assertEquals("12", tokens.get(12).getText());

	assertEquals(28, tokens.get(13).getStart());
	assertEquals(29, tokens.get(13).getEnd());
	assertEquals(".", tokens.get(13).getText());

	assertEquals(29, tokens.get(14).getStart());
	assertEquals(30, tokens.get(14).getEnd());
	assertEquals("-", tokens.get(14).getText());

	assertEquals(30, tokens.get(15).getStart());
	assertEquals(31, tokens.get(15).getEnd());
	assertEquals(".", tokens.get(15).getText());
}
 
开发者ID:leebird,项目名称:legonlp,代码行数:72,代码来源:NaiveTokenizerTest.java

示例6: load

import banner.tokenization.NaiveTokenizer; //导入依赖的package包/类
public static BannerProperties load(String filename, String dataroot)
{
	
	Properties properties = new Properties();
	BannerProperties bannerProperties = new BannerProperties();
	try {
		properties.load(new FileInputStream(filename));
		String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
		if (lemmatiserDataDirectory != null)
			bannerProperties.lemmatiser = new EngLemmatiser(dataroot+lemmatiserDataDirectory, false, true);
		String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
		if (posTaggerDataDirectory != null)
		{
			String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
			if (posTagger.equals(HeppleTagger.class.getName()))
				bannerProperties.posTagger = new HeppleTagger(dataroot+posTaggerDataDirectory);
			else if (posTagger.equals(MedPostTagger.class.getName()))
				bannerProperties.posTagger = new MedPostTagger(dataroot+posTaggerDataDirectory);
			else
				throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
		}
		String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
		if (tokenizer.equals(NaiveTokenizer.class.getName()))
			bannerProperties.tokenizer = new NaiveTokenizer();
		else if (tokenizer.equals(SimpleTokenizer.class.getName()))
			bannerProperties.tokenizer = new SimpleTokenizer();
		else if (tokenizer.equals(BaseTokenizer.class.getName()))
			bannerProperties.tokenizer = new BaseTokenizer();
		else
			throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
		bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
		if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
			bannerProperties.postProcessor = new ParenthesisPostProcessor();
		bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
		bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
		bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
		bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
	return bannerProperties;
}
 
开发者ID:BlueBrain,项目名称:bluima,代码行数:43,代码来源:BannerProperties.java


注:本文中的banner.tokenization.NaiveTokenizer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。