当前位置: 首页>>代码示例>>Java>>正文


Java TokenizerME类代码示例

本文整理汇总了Java中opennlp.tools.tokenize.TokenizerME的典型用法代码示例。如果您正苦于以下问题:Java TokenizerME类的具体用法?Java TokenizerME怎么用?Java TokenizerME使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TokenizerME类属于opennlp.tools.tokenize包,在下文中一共展示了TokenizerME类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: doRun

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public List<Word> doRun(Language language, String sentence) {
    Tokenizer tokenizer = new TokenizerME(getTokenizerModel(language));
    POSTaggerME tagger = new POSTaggerME(getPOSModel(language));
    String[] tokens = tokenizer.tokenize(sentence);
    String[] tags = tagger.tag(tokens);

    PartOfSpeechSet posSet = PartOfSpeechSet.getPOSSet(language);

    List<Word> words = new ArrayList<>();
    for (int i = 0; i < tokens.length; i++) {
        words.add(new Word(posSet.valueOf(tags[i]), tokens[i]));
    }

    return words;
}
 
开发者ID:Lambda-3,项目名称:Stargraph,代码行数:17,代码来源:OpenNLPAnnotator.java

示例2: OpenNLP

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
 * Private constructor to allow only one {@link OpenNLP} for each Thread
 *
 * @throws IllegalStateException if an error occurred from {@link LoaderNLP} or {@link PropertiesManager}
 */
private OpenNLP() {
    try {
        detector = new SentenceDetectorME(LoaderNLP.getSentenceModel());
        tokenizer = new TokenizerME(LoaderNLP.getTokenizerModel());
        tagger = new POSTaggerME(LoaderNLP.getPosModel());
        nameFinderOrg = new NameFinderME(LoaderNLP.getTokenNameFinderModelOrg());
        nameFinderLoc = new NameFinderME(LoaderNLP.getTokenNameFinderModelLoc());
        nameFinderPers = new NameFinderME(LoaderNLP.getTokenNameFinderModelPers());
        InputStream inputStream = new FileInputStream(PROPERTIES_MANAGER.getProperty("nlp.dictionaries.path"));
        lemmatizer = new SimpleLemmatizer(inputStream);
        inputStream.close();
    } catch (IllegalArgumentException | IOException e) {
        LOGGER.error(e.getMessage());
        throw new IllegalStateException(e);
    }
}
 
开发者ID:IKB4Stream,项目名称:IKB4Stream,代码行数:22,代码来源:OpenNLP.java

示例3: tokenDetect

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public String[] tokenDetect(String sentence) {
	File modelIn = null;
	String tokens[] = null;
	try {
		File userDir = new File(System.getProperty("user.dir"));
		if (this.turNLPInstance.getLanguage().equals("en_US")) {
			modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/en/en-token.bin"));
		} else if (this.turNLPInstance.getLanguage().equals("pt_BR")) {
			modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/pt/pt-token.bin"));
		}
		TokenizerModel model = new TokenizerModel(modelIn);
		Tokenizer tokenizer = new TokenizerME(model);
		tokens = tokenizer.tokenize(sentence);
	} catch (IOException e) {
		e.printStackTrace();
	}
	return tokens;
}
 
开发者ID:openviglet,项目名称:turing,代码行数:19,代码来源:TurOpenNLPConnector.java

示例4: initialize

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
 * Initializes the current instance with the given context.
 * 
 * Note: Do all initialization in this method, do not use the constructor.
 */
public void initialize(UimaContext context) throws ResourceInitializationException {

    super.initialize(context);

    TokenizerModel model;

    try {
        TokenizerModelResource modelResource =
                        (TokenizerModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER);

        model = modelResource.getModel();
    } catch (ResourceAccessException e) {
        throw new ResourceInitializationException(e);
    }

    tokenizer = new TokenizerME(model);
}
 
开发者ID:deeplearning4j,项目名称:DataVec,代码行数:23,代码来源:ConcurrentTokenizer.java

示例5: init

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
 * Initialization method. Creates a new graph and initializes the StanfordNLPCore pipeline if needed
 * @param sent
 * @param token
 */
private void init(InputStream sent, InputStream token, InputStream stop, InputStream exstop) throws IOException {
    // creates a new SentenceDetector, POSTagger, and Tokenizer
    SentenceModel sentModel = new SentenceModel(sent);
    sent.close();
    sdetector = new SentenceDetectorME(sentModel);
    TokenizerModel tokenModel = new TokenizerModel(token);
    token.close();
    tokenizer = new TokenizerME(tokenModel);
    BufferedReader br = new BufferedReader(new InputStreamReader(stop));
    String line;
    while ((line = br.readLine()) != null) {
        stopwords.add(line);
    }
    br.close();
    br = new BufferedReader(new InputStreamReader(exstop));
    while ((line = br.readLine()) != null) {
        extendedStopwords.add(line);
    }
    br.close();
}
 
开发者ID:J0Nreynolds,项目名称:Articleate,代码行数:26,代码来源:TextRank.java

示例6: doInitialize

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
	try {
		tokensModel.loadModel(TokenizerModel.class, getClass().getResourceAsStream("en_token.bin"));
		sentencesModel.loadModel(SentenceModel.class, getClass().getResourceAsStream("en_sent.bin"));
		posModel.loadModel(POSModel.class, getClass().getResourceAsStream("en_pos_maxent.bin"));
		chunkModel.loadModel(ChunkerModel.class, getClass().getResourceAsStream("en_chunker.bin"));
	} catch (BaleenException be) {
		getMonitor().error("Unable to load OpenNLP Language Models", be);
		throw new ResourceInitializationException(be);
	}

	try {
		sentenceDetector = new SentenceDetectorME((SentenceModel) sentencesModel.getModel());
		wordTokenizer = new TokenizerME((TokenizerModel) tokensModel.getModel());
		posTagger = new POSTaggerME((POSModel) posModel.getModel());
		phraseChunker = new ChunkerME((ChunkerModel) chunkModel.getModel());
	} catch (Exception e) {
		getMonitor().error("Unable to create OpenNLP taggers", e);
		throw new ResourceInitializationException(e);
	}
}
 
开发者ID:dstl,项目名称:baleen,代码行数:23,代码来源:OpenNLP.java

示例7: scoreStructure

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public double scoreStructure(String ca, String q, String passage, boolean verbose) throws InvalidFormatException, IOException{
	POSTaggerME parserModel = new POSTaggerME(new POSModel(new FileInputStream(new File("en-pos-model.bin"))));
	Tokenizer tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(new File("en-token.bin"))));
	Parser parser = ParserFactory.create(new ParserModel(new FileInputStream(new File("en-parser.bin"))));
	double score = 0;
	
	Parse[] questionParse = ParserTool.parseLine(q, parser, 1);
	Parse[] passageParse = ParserTool.parseLine(q, parser, 1);
	
	if (passage.contains(ca)) {
		for (int i =0; i < questionParse.length; i++) {
			score += matchChildren(questionParse[i],passageParse[i]);
		}
	}
	
	return score;
}
 
开发者ID:SeanTater,项目名称:uncc2014watsonsim,代码行数:18,代码来源:JM_Scorer.java

示例8: startStage

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public void startStage(StageConfiguration config) {

  // parse the config to map the params properly
  textField = config.getProperty("textField", textField);
  peopleField = config.getProperty("peopleField", peopleField);
  posTextField = config.getProperty("posTextField", posTextField);

  try {
    // Sentence finder
    SentenceModel sentModel = new SentenceModel(new FileInputStream(sentenceModelFile));
    sentenceDetector = new SentenceDetectorME(sentModel);
    // tokenizer
    TokenizerModel tokenModel = new TokenizerModel(new FileInputStream(tokenModelFile));
    tokenizer = new TokenizerME(tokenModel);
    // person name finder
    TokenNameFinderModel nameModel = new TokenNameFinderModel(new FileInputStream(personModelFile));
    nameFinder = new NameFinderME(nameModel);
    // load the part of speech tagger.
    posTagger = new POSTaggerME(new POSModel(new FileInputStream(posModelFile)));
  } catch (IOException e) {
    log.info("Error loading up OpenNLP Models. {}", e.getLocalizedMessage());
    e.printStackTrace();
  }
}
 
开发者ID:MyRobotLab,项目名称:myrobotlab,代码行数:26,代码来源:NounPhraseExtractor.java

示例9: exec

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public DataBag exec(Tuple input) throws IOException
{
    if(input.size() != 1) {
        throw new IOException();
    }

    String inputString = input.get(0).toString();
    if(inputString == null || inputString == "") {
        return null;
    }
    DataBag outBag = bf.newDefaultBag();
    if(this.tokenizer == null) {
        String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);;
        InputStream file = new FileInputStream(loadFile);
        InputStream buffer = new BufferedInputStream(file);
        TokenizerModel model = new TokenizerModel(buffer);
        this.tokenizer = new TokenizerME(model);
    }
    String tokens[] = this.tokenizer.tokenize(inputString);
    for(String token : tokens) {
        Tuple outTuple = tf.newTuple(token);
        outBag.add(outTuple);
    }
    return outBag;
}
 
开发者ID:apache,项目名称:incubator-datafu,代码行数:26,代码来源:TokenizeME.java

示例10: KeyPhraseChunkExtractor

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public KeyPhraseChunkExtractor() throws Exception, IOException {

		InputStream modelIn = getClass().getResourceAsStream(
				"/nlptools/data/en-pos-maxent.bin");
		posModel = new POSModel(modelIn);
		tagger = new POSTaggerME(posModel);

		modelIn = getClass().getResourceAsStream(
				"/nlptools/data/en-chunker.bin");
		chunkModel = new ChunkerModel(modelIn);
		chunker = new ChunkerME(chunkModel);

		modelIn = getClass().getResourceAsStream("/nlptools/data/en-token.bin");
		nlTokenizerModel = new TokenizerModel(modelIn);
		nlTokenizer = new TokenizerME(nlTokenizerModel);
	}
 
开发者ID:mast-group,项目名称:nlptools,代码行数:17,代码来源:KeyPhraseChunkExtractor.java

示例11: inform

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
    if(sentenceModelFile!=null) {
        sentenceOp = new SentenceDetectorME(new SentenceModel(
                loader.openResource(sentenceModelFile)));
    }

    if(tokenizerModelFile==null)
        throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile);
    tokenizerOp = new TokenizerME(new TokenizerModel(
            loader.openResource(tokenizerModelFile)
    ));

    if(parChunkingClass!=null) {
        try {
            Class c = Class.forName(parChunkingClass);
            Object o = c.newInstance();
            paragraphChunker = (ParagraphChunker) o;
        }catch (Exception e){
            throw new IOException(e);
        }
    }

}
 
开发者ID:ziqizhang,项目名称:jate,代码行数:25,代码来源:OpenNLPTokenizerFactory.java

示例12: initialize

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public static void initialize() throws IOException {
	
	/* normal model */
	/*
	model = new POSModelLoader().load(new File(RESOURCES + "pt.postagger.model"));
       tModel = new TokenizerModel(new FileInputStream(RESOURCES + "pt.tokenizer.model")); 
       sModel = new SentenceModel(new FileInputStream(RESOURCES + "pt.sentdetect.model"));
       */
	
       /* with VPP tag */
       model = new POSModelLoader().load(new File(RESOURCES + "pt.postaggerVerbPP.model"));
       tModel = new TokenizerModel(new FileInputStream(RESOURCES + "pt.tokenizerVerbPP.model")); 
       sModel = new SentenceModel(new FileInputStream(RESOURCES + "pt.sentDetectVerbPP.model"));
               
       tagger = new POSTaggerME(model); 
       token = new TokenizerME(tModel);
       sent = new SentenceDetectorME(sModel);
}
 
开发者ID:davidsbatista,项目名称:MuSICo,代码行数:19,代码来源:PortuguesePOSTagger.java

示例13: segmentWords

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
public List<String> segmentWords(String text) {
	
	List<String> wordsList = new ArrayList<String>();
    
    try {
    	InputStream modelIn = getClass().getResourceAsStream(wordBin);;
		TokenizerModel model = new TokenizerModel(modelIn);
		TokenizerME tokenizer = new TokenizerME(model);
		String[] words = tokenizer.tokenize(text);
		for(String word : words)
			if (!punctuation.contains(word))
				wordsList.add(word);
		
		modelIn.close();
	} catch (IOException e) {
		e.printStackTrace();
	}
    
    return wordsList;
}
 
开发者ID:kariminf,项目名称:langpi,代码行数:21,代码来源:OpennlpSegmenter.java

示例14: initialize

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
 * Initializes the current instance with the given context.
 * 
 * Note: Do all initialization in this method, do not use the constructor.
 */
public void initialize(UimaContext context)
    throws ResourceInitializationException {

  super.initialize(context);

  TokenizerModel model;

  try {
    TokenizerModelResource modelResource = (TokenizerModelResource) context
        .getResourceObject(UimaUtil.MODEL_PARAMETER);

    model = modelResource.getModel();
  } catch (ResourceAccessException e) {
    throw new ResourceInitializationException(e);
  }

  tokenizer = new TokenizerME(model);
}
 
开发者ID:jpatanooga,项目名称:Canova,代码行数:24,代码来源:ConcurrentTokenizer.java

示例15: initialize

import opennlp.tools.tokenize.TokenizerME; //导入依赖的package包/类
/**
 * Initializes the current instance with the given context.
 *
 * Note: Do all initialization in this method, do not use the constructor.
 */
public void initialize(UimaContext context) throws ResourceInitializationException {

    super.initialize(context);

    TokenizerModel model;

    try {
        TokenizerModelResource modelResource =
                        (TokenizerModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER);

        model = modelResource.getModel();
    } catch (ResourceAccessException e) {
        throw new ResourceInitializationException(e);
    }

    tokenizer = new TokenizerME(model);
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:23,代码来源:ConcurrentTokenizer.java


注:本文中的opennlp.tools.tokenize.TokenizerME类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。