本文整理匯總了Java中opennlp.tools.sentdetect.SentenceDetectorME類的典型用法代碼示例。如果您正苦於以下問題:Java SentenceDetectorME類的具體用法?Java SentenceDetectorME怎麽用?Java SentenceDetectorME使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
SentenceDetectorME類屬於opennlp.tools.sentdetect包,在下文中一共展示了SentenceDetectorME類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: OpenNLP
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
/**
* Private constructor to allow only one {@link OpenNLP} for each Thread
*
* @throws IllegalStateException if an error occurred from {@link LoaderNLP} or {@link PropertiesManager}
*/
private OpenNLP() {
try {
detector = new SentenceDetectorME(LoaderNLP.getSentenceModel());
tokenizer = new TokenizerME(LoaderNLP.getTokenizerModel());
tagger = new POSTaggerME(LoaderNLP.getPosModel());
nameFinderOrg = new NameFinderME(LoaderNLP.getTokenNameFinderModelOrg());
nameFinderLoc = new NameFinderME(LoaderNLP.getTokenNameFinderModelLoc());
nameFinderPers = new NameFinderME(LoaderNLP.getTokenNameFinderModelPers());
InputStream inputStream = new FileInputStream(PROPERTIES_MANAGER.getProperty("nlp.dictionaries.path"));
lemmatizer = new SimpleLemmatizer(inputStream);
inputStream.close();
} catch (IllegalArgumentException | IOException e) {
LOGGER.error(e.getMessage());
throw new IllegalStateException(e);
}
}
示例2: sentenceDetect
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public String[] sentenceDetect(String text) {
File modelIn = null;
String sentences[] = null;
try {
File userDir = new File(System.getProperty("user.dir"));
if (this.turNLPInstance.getLanguage().equals("en_US")) {
modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/en/en-sent.bin"));
} else if (this.turNLPInstance.getLanguage().equals("pt_BR")) {
modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/pt/pt-sent.bin"));
}
SentenceModel model = new SentenceModel(modelIn);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
sentences = sentenceDetector.sentDetect(text);
} catch (IOException e) {
e.printStackTrace();
}
return sentences;
}
示例3: trainSentences
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public static void trainSentences(final String inResource, String outFile) throws IOException {
InputStreamFactory inputStreamFactory = new InputStreamFactory() {
@Override
public InputStream createInputStream() throws IOException {
return Trainer.class.getResourceAsStream(inResource);
}
};
SentenceSampleStream samples = new SentenceSampleStream(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
TrainingParameters trainingParameters = new TrainingParameters();
trainingParameters.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name());
trainingParameters.put(TrainingParameters.ITERATIONS_PARAM, "100");
trainingParameters.put(TrainingParameters.CUTOFF_PARAM, "0");
SentenceDetectorFactory sentenceDetectorFactory = SentenceDetectorFactory.create(null, "en", true, null, ".?!".toCharArray());
SentenceModel sentdetectModel = SentenceDetectorME.train("en", samples, sentenceDetectorFactory, trainingParameters);
//.train("en", samples, true, null, 100, 0);
samples.close();
FileOutputStream out = new FileOutputStream(outFile);
sentdetectModel.serialize(out);
out.close();
}
示例4: init
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
/**
* Initialization method. Creates a new graph and initializes the StanfordNLPCore pipeline if needed
* @param sent
* @param token
*/
private void init(InputStream sent, InputStream token, InputStream stop, InputStream exstop) throws IOException {
// creates a new SentenceDetector, POSTagger, and Tokenizer
SentenceModel sentModel = new SentenceModel(sent);
sent.close();
sdetector = new SentenceDetectorME(sentModel);
TokenizerModel tokenModel = new TokenizerModel(token);
token.close();
tokenizer = new TokenizerME(tokenModel);
BufferedReader br = new BufferedReader(new InputStreamReader(stop));
String line;
while ((line = br.readLine()) != null) {
stopwords.add(line);
}
br.close();
br = new BufferedReader(new InputStreamReader(exstop));
while ((line = br.readLine()) != null) {
extendedStopwords.add(line);
}
br.close();
}
示例5: doInitialize
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
try {
tokensModel.loadModel(TokenizerModel.class, getClass().getResourceAsStream("en_token.bin"));
sentencesModel.loadModel(SentenceModel.class, getClass().getResourceAsStream("en_sent.bin"));
posModel.loadModel(POSModel.class, getClass().getResourceAsStream("en_pos_maxent.bin"));
chunkModel.loadModel(ChunkerModel.class, getClass().getResourceAsStream("en_chunker.bin"));
} catch (BaleenException be) {
getMonitor().error("Unable to load OpenNLP Language Models", be);
throw new ResourceInitializationException(be);
}
try {
sentenceDetector = new SentenceDetectorME((SentenceModel) sentencesModel.getModel());
wordTokenizer = new TokenizerME((TokenizerModel) tokensModel.getModel());
posTagger = new POSTaggerME((POSModel) posModel.getModel());
phraseChunker = new ChunkerME((ChunkerModel) chunkModel.getModel());
} catch (Exception e) {
getMonitor().error("Unable to create OpenNLP taggers", e);
throw new ResourceInitializationException(e);
}
}
示例6: main
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public static void main(String[] strings) throws Exception {
String text = "“But I don’t want to go among mad people,” Alice remarked. " +
"“Oh, you can’t help that,” said the Cat: “we’re all mad here. I’m mad. You’re mad.” " +
"“How do you know I’m mad?” said Alice. " +
"“You must be,” said the Cat, “or you wouldn’t have come here.”";
try (InputStream modelIn = new FileInputStream(NATURAL_LANGUAGE_PROCESSING_SRC_MAIN_RESOURCES_EN_SENT_BIN)) {
SentenceModel model = new SentenceModel(modelIn);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
String sentences[] = sentenceDetector.sentDetect(text);
Span sentences2[] = sentenceDetector.sentPosDetect(text);
for (String sentence : sentences) {
System.out.println(sentence);
}
System.out.println(Arrays.deepToString(sentences2));
}
}
示例7: parsePassageText
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public static Parse[] parsePassageText(String p) throws InvalidFormatException{
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
Parser parser = ParserFactory.create(
parserModel,
20, // beam size
0.95); // advance percentage
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
String sent= StringUtils.join(tks," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例8: parsePassageText
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public Parse[] parsePassageText(String p) throws InvalidFormatException{
if (!modelsAreInitialized)init();
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
Parser parser = ParserFactory.create(
this.parserModel,
20, // beam size
0.95); // advance percentage
//find sentences, tokenize each, parse each, return top parse for each
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
//StringTokenizer st = new StringTokenizer(tks[i]);
//There are several tokenizers available. SimpleTokenizer works best
String sent= StringUtils.join(tks," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例9: segmentEssay
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public void segmentEssay(Essay essay) {
try {
smodel = new FileInputStream(
System.getProperty("user.dir")
+ "/Models/en-sent.bin"
);
SentenceModel model = new SentenceModel(smodel);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
ArrayList<String> newsent = new ArrayList<>();
for (int i = 0; i < essay.getSentences().size(); i++) {
newsent.addAll(asList(sentenceDetector.sentDetect(essay.getSentences().get(i))));
}
essay.setDetectedSentences(newsent);
} catch (IOException e) {
e.printStackTrace();
} finally {
if (smodel != null) {
try {
smodel.close();
} catch (IOException ignored) {
}
}
}
}
示例10: startStage
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
@Override
public void startStage(StageConfiguration config) {
// parse the config to map the params properly
textField = config.getProperty("textField", textField);
peopleField = config.getProperty("peopleField", peopleField);
posTextField = config.getProperty("posTextField", posTextField);
try {
// Sentence finder
SentenceModel sentModel = new SentenceModel(new FileInputStream(sentenceModelFile));
sentenceDetector = new SentenceDetectorME(sentModel);
// tokenizer
TokenizerModel tokenModel = new TokenizerModel(new FileInputStream(tokenModelFile));
tokenizer = new TokenizerME(tokenModel);
// person name finder
TokenNameFinderModel nameModel = new TokenNameFinderModel(new FileInputStream(personModelFile));
nameFinder = new NameFinderME(nameModel);
// load the part of speech tagger.
posTagger = new POSTaggerME(new POSModel(new FileInputStream(posModelFile)));
} catch (IOException e) {
log.info("Error loading up OpenNLP Models. {}", e.getLocalizedMessage());
e.printStackTrace();
}
}
示例11: exec
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public DataBag exec(Tuple input) throws IOException
{
if(input.size() != 1) {
throw new IOException();
}
String inputString = input.get(0).toString();
if(inputString == null || inputString == "") {
return null;
}
DataBag outBag = bf.newDefaultBag();
if(sdetector == null) {
String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);
InputStream is = new FileInputStream(loadFile);
InputStream buffer = new BufferedInputStream(is);
SentenceModel model = new SentenceModel(buffer);
this.sdetector = new SentenceDetectorME(model);
}
String sentences[] = this.sdetector.sentDetect(inputString);
for(String sentence : sentences) {
Tuple outTuple = tf.newTuple(sentence);
outBag.add(outTuple);
}
return outBag;
}
示例12: inform
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
@Override
public void inform(ResourceLoader loader) throws IOException {
if(sentenceModelFile!=null) {
sentenceOp = new SentenceDetectorME(new SentenceModel(
loader.openResource(sentenceModelFile)));
}
if(tokenizerModelFile==null)
throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile);
tokenizerOp = new TokenizerME(new TokenizerModel(
loader.openResource(tokenizerModelFile)
));
if(parChunkingClass!=null) {
try {
Class c = Class.forName(parChunkingClass);
Object o = c.newInstance();
paragraphChunker = (ParagraphChunker) o;
}catch (Exception e){
throw new IOException(e);
}
}
}
示例13: initialize
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public static void initialize() throws IOException {
/* normal model */
/*
model = new POSModelLoader().load(new File(RESOURCES + "pt.postagger.model"));
tModel = new TokenizerModel(new FileInputStream(RESOURCES + "pt.tokenizer.model"));
sModel = new SentenceModel(new FileInputStream(RESOURCES + "pt.sentdetect.model"));
*/
/* with VPP tag */
model = new POSModelLoader().load(new File(RESOURCES + "pt.postaggerVerbPP.model"));
tModel = new TokenizerModel(new FileInputStream(RESOURCES + "pt.tokenizerVerbPP.model"));
sModel = new SentenceModel(new FileInputStream(RESOURCES + "pt.sentDetectVerbPP.model"));
tagger = new POSTaggerME(model);
token = new TokenizerME(tModel);
sent = new SentenceDetectorME(sModel);
}
示例14: splitToSentences
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
public List<String> splitToSentences(String text) {
List<String> sentences = new ArrayList<String>();
//List<String> returnedSentences = new ArrayList<String>();
try {
InputStream modelIn = getClass().getResourceAsStream(sentBin);
SentenceModel model = new SentenceModel(modelIn);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
String[] initSentences = sentenceDetector.sentDetect(text);
for(String snt : initSentences){
sentences.add(snt);
}
modelIn.close();
} catch (IOException e) {
e.printStackTrace();
}
return sentences;
}
示例15: loadSentenceDetector
import opennlp.tools.sentdetect.SentenceDetectorME; //導入依賴的package包/類
/** Load the sentence detector
*
* @param language
* @param modelDirectory
* @throws IOException
*/
protected void loadSentenceDetector(String language, String modelDirectory) throws IOException {
String modelFile = modelDirectory +
File.separatorChar + language + "-sent.bin";
log.info("Loading sentence model {}", modelFile);
InputStream modelStream = new FileInputStream(modelFile);
SentenceModel model = new SentenceModel(modelStream);
detector = new SentenceDetectorME(model);
}