本文整理汇总了Java中opennlp.tools.tokenize.TokenizerME类的典型用法代码示例。如果您正苦于以下问题:Java TokenizerME类的具体用法?Java TokenizerME怎么用?Java TokenizerME使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TokenizerME类属于opennlp.tools.tokenize包,在下文中一共展示了TokenizerME类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: doRun
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public List<Word> doRun(Language language, String sentence) {
Tokenizer tokenizer = new TokenizerME(getTokenizerModel(language));
POSTaggerME tagger = new POSTaggerME(getPOSModel(language));
String[] tokens = tokenizer.tokenize(sentence);
String[] tags = tagger.tag(tokens);
PartOfSpeechSet posSet = PartOfSpeechSet.getPOSSet(language);
List<Word> words = new ArrayList<>();
for (int i = 0; i < tokens.length; i++) {
words.add(new Word(posSet.valueOf(tags[i]), tokens[i]));
}
return words;
}
示例2: OpenNLP
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
* Private constructor to allow only one {@link OpenNLP} for each Thread
*
* @throws IllegalStateException if an error occurred from {@link LoaderNLP} or {@link PropertiesManager}
*/
private OpenNLP() {
try {
detector = new SentenceDetectorME(LoaderNLP.getSentenceModel());
tokenizer = new TokenizerME(LoaderNLP.getTokenizerModel());
tagger = new POSTaggerME(LoaderNLP.getPosModel());
nameFinderOrg = new NameFinderME(LoaderNLP.getTokenNameFinderModelOrg());
nameFinderLoc = new NameFinderME(LoaderNLP.getTokenNameFinderModelLoc());
nameFinderPers = new NameFinderME(LoaderNLP.getTokenNameFinderModelPers());
InputStream inputStream = new FileInputStream(PROPERTIES_MANAGER.getProperty("nlp.dictionaries.path"));
lemmatizer = new SimpleLemmatizer(inputStream);
inputStream.close();
} catch (IllegalArgumentException | IOException e) {
LOGGER.error(e.getMessage());
throw new IllegalStateException(e);
}
}
示例3: tokenDetect
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public String[] tokenDetect(String sentence) {
File modelIn = null;
String tokens[] = null;
try {
File userDir = new File(System.getProperty("user.dir"));
if (this.turNLPInstance.getLanguage().equals("en_US")) {
modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/en/en-token.bin"));
} else if (this.turNLPInstance.getLanguage().equals("pt_BR")) {
modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/pt/pt-token.bin"));
}
TokenizerModel model = new TokenizerModel(modelIn);
Tokenizer tokenizer = new TokenizerME(model);
tokens = tokenizer.tokenize(sentence);
} catch (IOException e) {
e.printStackTrace();
}
return tokens;
}
示例4: initialize
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
* Initializes the current instance with the given context.
*
* Note: Do all initialization in this method, do not use the constructor.
*/
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
TokenizerModel model;
try {
TokenizerModelResource modelResource =
(TokenizerModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER);
model = modelResource.getModel();
} catch (ResourceAccessException e) {
throw new ResourceInitializationException(e);
}
tokenizer = new TokenizerME(model);
}
示例5: init
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
* Initialization method. Creates a new graph and initializes the StanfordNLPCore pipeline if needed
* @param sent
* @param token
*/
private void init(InputStream sent, InputStream token, InputStream stop, InputStream exstop) throws IOException {
// creates a new SentenceDetector, POSTagger, and Tokenizer
SentenceModel sentModel = new SentenceModel(sent);
sent.close();
sdetector = new SentenceDetectorME(sentModel);
TokenizerModel tokenModel = new TokenizerModel(token);
token.close();
tokenizer = new TokenizerME(tokenModel);
BufferedReader br = new BufferedReader(new InputStreamReader(stop));
String line;
while ((line = br.readLine()) != null) {
stopwords.add(line);
}
br.close();
br = new BufferedReader(new InputStreamReader(exstop));
while ((line = br.readLine()) != null) {
extendedStopwords.add(line);
}
br.close();
}
示例6: doInitialize
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
try {
tokensModel.loadModel(TokenizerModel.class, getClass().getResourceAsStream("en_token.bin"));
sentencesModel.loadModel(SentenceModel.class, getClass().getResourceAsStream("en_sent.bin"));
posModel.loadModel(POSModel.class, getClass().getResourceAsStream("en_pos_maxent.bin"));
chunkModel.loadModel(ChunkerModel.class, getClass().getResourceAsStream("en_chunker.bin"));
} catch (BaleenException be) {
getMonitor().error("Unable to load OpenNLP Language Models", be);
throw new ResourceInitializationException(be);
}
try {
sentenceDetector = new SentenceDetectorME((SentenceModel) sentencesModel.getModel());
wordTokenizer = new TokenizerME((TokenizerModel) tokensModel.getModel());
posTagger = new POSTaggerME((POSModel) posModel.getModel());
phraseChunker = new ChunkerME((ChunkerModel) chunkModel.getModel());
} catch (Exception e) {
getMonitor().error("Unable to create OpenNLP taggers", e);
throw new ResourceInitializationException(e);
}
}
示例7: scoreStructure
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
double score = 0;
Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
if (passage.contains(ca)) {
for (int i =0; i < questionParse.length; i++) {
score += matchChildren(questionParse[i],passageParse[i]);
}
}
return score;
}
示例8: startStage
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public void startStage(StageConfiguration config) {
// parse the config to map the params properly
textField = config.getProperty("textField", textField);
peopleField = config.getProperty("peopleField", peopleField);
posTextField = config.getProperty("posTextField", posTextField);
try {
// Sentence finder
SentenceModel sentModel = new SentenceModel(new FileInputStream(sentenceModelFile));
sentenceDetector = new SentenceDetectorME(sentModel);
// tokenizer
TokenizerModel tokenModel = new TokenizerModel(new FileInputStream(tokenModelFile));
tokenizer = new TokenizerME(tokenModel);
// person name finder
TokenNameFinderModel nameModel = new TokenNameFinderModel(new FileInputStream(personModelFile));
nameFinder = new NameFinderME(nameModel);
// load the part of speech tagger.
posTagger = new POSTaggerME(new POSModel(new FileInputStream(posModelFile)));
} catch (IOException e) {
log.info("Error loading up OpenNLP Models. {}", e.getLocalizedMessage());
e.printStackTrace();
}
}
示例9: exec
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException
{
if(input.size() != 1) {
throw new IOException();
}
String inputString = input.get(0).toString();
if(inputString == null || inputString == "") {
return null;
}
DataBag outBag = bf.newDefaultBag();
if(this.tokenizer == null) {
String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);;
InputStream file = new FileInputStream(loadFile);
InputStream buffer = new BufferedInputStream(file);
TokenizerModel model = new TokenizerModel(buffer);
this.tokenizer = new TokenizerME(model);
}
String tokens[] = this.tokenizer.tokenize(inputString);
for(String token : tokens) {
Tuple outTuple = tf.newTuple(token);
outBag.add(outTuple);
}
return outBag;
}
示例10: KeyPhraseChunkExtractor
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public KeyPhraseChunkExtractor() throws Exception, IOException {
InputStream modelIn = getClass().getResourceAsStream(
"/nlptools/data/en-pos-maxent.bin");
posModel = new POSModel(modelIn);
tagger = new POSTaggerME(posModel);
modelIn = getClass().getResourceAsStream(
"/nlptools/data/en-chunker.bin");
chunkModel = new ChunkerModel(modelIn);
chunker = new ChunkerME(chunkModel);
modelIn = getClass().getResourceAsStream("/nlptools/data/en-token.bin");
nlTokenizerModel = new TokenizerModel(modelIn);
nlTokenizer = new TokenizerME(nlTokenizerModel);
}
示例11: inform
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
if(sentenceModelFile!=null) {
sentenceOp = new SentenceDetectorME(new SentenceModel(
loader.openResource(sentenceModelFile)));
}
if(tokenizerModelFile==null)
throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile);
tokenizerOp = new TokenizerME(new TokenizerModel(
loader.openResource(tokenizerModelFile)
));
if(parChunkingClass!=null) {
try {
Class c = Class.forName(parChunkingClass);
Object o = c.newInstance();
paragraphChunker = (ParagraphChunker) o;
}catch (Exception e){
throw new IOException(e);
}
}
}
示例12: initialize
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public static void initialize() throws IOException {
/* normal model */
/*
model = new POSModelLoader().load(new File(RESOURCES + "pt.postagger.model"));
tModel = new TokenizerModel(new FileInputStream(RESOURCES + "pt.tokenizer.model"));
sModel = new SentenceModel(new FileInputStream(RESOURCES + "pt.sentdetect.model"));
*/
/* with VPP tag */
model = new POSModelLoader().load(new File(RESOURCES + "pt.postaggerVerbPP.model"));
tModel = new TokenizerModel(new FileInputStream(RESOURCES + "pt.tokenizerVerbPP.model"));
sModel = new SentenceModel(new FileInputStream(RESOURCES + "pt.sentDetectVerbPP.model"));
tagger = new POSTaggerME(model);
token = new TokenizerME(tModel);
sent = new SentenceDetectorME(sModel);
}
示例13: segmentWords
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public List<String> segmentWords(String text) {
List<String> wordsList = new ArrayList<String>();
try {
InputStream modelIn = getClass().getResourceAsStream(wordBin);;
TokenizerModel model = new TokenizerModel(modelIn);
TokenizerME tokenizer = new TokenizerME(model);
String[] words = tokenizer.tokenize(text);
for(String word : words)
if (!punctuation.contains(word))
wordsList.add(word);
modelIn.close();
} catch (IOException e) {
e.printStackTrace();
}
return wordsList;
}
示例14: initialize
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
* Initializes the current instance with the given context.
*
* Note: Do all initialization in this method, do not use the constructor.
*/
public void initialize(UimaContext context)
throws ResourceInitializationException {
super.initialize(context);
TokenizerModel model;
try {
TokenizerModelResource modelResource = (TokenizerModelResource) context
.getResourceObject(UimaUtil.MODEL_PARAMETER);
model = modelResource.getModel();
} catch (ResourceAccessException e) {
throw new ResourceInitializationException(e);
}
tokenizer = new TokenizerME(model);
}
示例15: initialize
import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
* Initializes the current instance with the given context.
*
* Note: Do all initialization in this method, do not use the constructor.
*/
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
TokenizerModel model;
try {
TokenizerModelResource modelResource =
(TokenizerModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER);
model = modelResource.getModel();
} catch (ResourceAccessException e) {
throw new ResourceInitializationException(e);
}
tokenizer = new TokenizerME(model);
}