本文整理汇总了Java中com.aliasi.tokenizer.IndoEuropeanTokenizerFactory类的典型用法代码示例。如果您正苦于以下问题:Java IndoEuropeanTokenizerFactory类的具体用法?Java IndoEuropeanTokenizerFactory怎么用?Java IndoEuropeanTokenizerFactory使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
IndoEuropeanTokenizerFactory类属于com.aliasi.tokenizer包,在下文中一共展示了IndoEuropeanTokenizerFactory类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initialize
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; //导入依赖的package包/类
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
// initialize sentence chunker
TokenizerFactory tokenizerFactory = UimaContextHelper.createObjectFromConfigParameter(context,
"tokenizer-factory", "tokenizer-factory-params", IndoEuropeanTokenizerFactory.class,
TokenizerFactory.class);
SentenceModel sentenceModel = UimaContextHelper.createObjectFromConfigParameter(context,
"sentence-model", "sentence-model-params", IndoEuropeanSentenceModel.class,
SentenceModel.class);
chunker = new SentenceChunker(tokenizerFactory, sentenceModel);
// initialize hits
hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 200);
// initialize query analyzer, index writer config, and query parser
analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer",
"query-analyzer-params", StandardAnalyzer.class, Analyzer.class);
parser = new QueryParser("text", analyzer);
// initialize query string constructor
queryStringConstructor = UimaContextHelper.createObjectFromConfigParameter(context,
"query-string-constructor", "query-string-constructor-params",
BooleanBagOfPhraseQueryStringConstructor.class, QueryStringConstructor.class);
}
示例2: initialize
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; //导入依赖的package包/类
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
TokenizerFactory tokenizerFactory = UimaContextHelper.createObjectFromConfigParameter(context,
"tokenizer-factory", "tokenizer-factory-params", IndoEuropeanTokenizerFactory.class,
TokenizerFactory.class);
SentenceModel sentenceModel = UimaContextHelper.createObjectFromConfigParameter(context,
"sentence-model", "sentence-model-params", IndoEuropeanSentenceModel.class,
SentenceModel.class);
chunker = new SentenceChunker(tokenizerFactory, sentenceModel);
// initialize hits
hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 200);
// initialize query analyzer, index writer config, and query parser
analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer",
"query-analyzer-params", StandardAnalyzer.class, Analyzer.class);
parser = new QueryParser("text", analyzer);
// initialize query string constructor
queryStringConstructor = UimaContextHelper.createObjectFromConfigParameter(context,
"query-string-constructor", "query-string-constructor-params",
BagOfPhraseQueryStringConstructor.class, QueryStringConstructor.class);
String parserProviderName = UimaContextHelper
.getConfigParameterStringValue(context, "parser-provider");
parserProvider = ProviderCache.getProvider(parserProviderName, ParserProvider.class);
lemma = new StanfordLemmatizer();
}
示例3: getSignificantPhrases
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; //导入依赖的package包/类
public static Map<String, List<String>> getSignificantPhrases(
ReviewCorpus corpus) {
Map<String, List<String>> words = new HashMap<String, List<String>>();
for (Entry<String, List<String>> entry : Categorizer
.getCategories(corpus, Categorizer.entityExtractor,
Categorizer.categoryWithSentenceCollector,
new HashMap<String, List<String>>())
.entrySet()) {
TokenizedLM model = new TokenizedLM(
IndoEuropeanTokenizerFactory.INSTANCE, NGRAM);
for (String sentence : entry.getValue()) {
model.handle(sentence);
}
model.sequenceCounter().prune(3);
words.put(entry.getKey(), new ArrayList<String>());
words.get(entry.getKey()).addAll(getCollocations(model));
words.get(entry.getKey()).addAll(getFrequentTerms(model));
}
return words;
}
示例4: NamedEntityChunkParser
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; //导入依赖的package包/类
public NamedEntityChunkParser(ReviewCorpus trainerCorpus) throws JAXBException,
SAXException, ParserConfigurationException, IOException {
TokenizerFactory factory = IndoEuropeanTokenizerFactory.INSTANCE;
HmmCharLmEstimator estimator = new HmmCharLmEstimator(NGRAM, CHARS,
INTERPOLATION);
chunkerEstimator = new CharLmHmmChunker(factory, estimator);
train(trainerCorpus);
}
示例5: LingPipeSentenceSplitter
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; //导入依赖的package包/类
/**
*
*/
public LingPipeSentenceSplitter()
{
// load sentence splitter
sentenceChunker = new SentenceChunker(new IndoEuropeanTokenizerFactory(), new IndoEuropeanSentenceModel(true, true));
}
示例6: init
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory; //导入依赖的package包/类
/** Initialise this resource, and return it. */
public Resource init() throws ResourceInstantiationException {
// construct tokenizer
tf = IndoEuropeanTokenizerFactory.INSTANCE;
return this;
}