本文整理匯總了Java中opennlp.tools.sentdetect.SentenceDetectorME.sentDetect方法的典型用法代碼示例。如果您正苦於以下問題:Java SentenceDetectorME.sentDetect方法的具體用法?Java SentenceDetectorME.sentDetect怎麽用?Java SentenceDetectorME.sentDetect使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類opennlp.tools.sentdetect.SentenceDetectorME
的用法示例。
在下文中一共展示了SentenceDetectorME.sentDetect方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: sentenceDetect
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public String[] sentenceDetect(String text) {
File modelIn = null;
String sentences[] = null;
try {
File userDir = new File(System.getProperty("user.dir"));
if (this.turNLPInstance.getLanguage().equals("en_US")) {
modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/en/en-sent.bin"));
} else if (this.turNLPInstance.getLanguage().equals("pt_BR")) {
modelIn = new File(userDir.getAbsolutePath().concat("/models/opennlp/pt/pt-sent.bin"));
}
SentenceModel model = new SentenceModel(modelIn);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
sentences = sentenceDetector.sentDetect(text);
} catch (IOException e) {
e.printStackTrace();
}
return sentences;
}
示例2: main
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public static void main(String[] strings) throws Exception {
String text = "“But I don’t want to go among mad people,” Alice remarked. " +
"“Oh, you can’t help that,” said the Cat: “we’re all mad here. I’m mad. You’re mad.” " +
"“How do you know I’m mad?” said Alice. " +
"“You must be,” said the Cat, “or you wouldn’t have come here.”";
try (InputStream modelIn = new FileInputStream(NATURAL_LANGUAGE_PROCESSING_SRC_MAIN_RESOURCES_EN_SENT_BIN)) {
SentenceModel model = new SentenceModel(modelIn);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
String sentences[] = sentenceDetector.sentDetect(text);
Span sentences2[] = sentenceDetector.sentPosDetect(text);
for (String sentence : sentences) {
System.out.println(sentence);
}
System.out.println(Arrays.deepToString(sentences2));
}
}
示例3: parsePassageText
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public static Parse[] parsePassageText(String p) throws InvalidFormatException{
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
Parser parser = ParserFactory.create(
parserModel,
20, // beam size
0.95); // advance percentage
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
String sent= StringUtils.join(tks," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例4: parsePassageText
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public Parse[] parsePassageText(String p) throws InvalidFormatException{
if (!modelsAreInitialized)init();
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
Parser parser = ParserFactory.create(
this.parserModel,
20, // beam size
0.95); // advance percentage
//find sentences, tokenize each, parse each, return top parse for each
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
//StringTokenizer st = new StringTokenizer(tks[i]);
//There are several tokenizers available. SimpleTokenizer works best
String sent= StringUtils.join(tks," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例5: splitToSentences
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public List<String> splitToSentences(String text) {
List<String> sentences = new ArrayList<String>();
//List<String> returnedSentences = new ArrayList<String>();
try {
InputStream modelIn = getClass().getResourceAsStream(sentBin);
SentenceModel model = new SentenceModel(modelIn);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
String[] initSentences = sentenceDetector.sentDetect(text);
for(String snt : initSentences){
sentences.add(snt);
}
modelIn.close();
} catch (IOException e) {
e.printStackTrace();
}
return sentences;
}
示例6: tokenizeText
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
/**
* Utility offered to other elements of the pipeline for text tokenizing.
*
* @param text the text to tokenize
* @param language the language of the input text
* @return an array containing the tokenized text.
*/
public static String[] tokenizeText(String text, String language) {
setup();
// Split the text into sentences
SentenceModel sentModel = getSentenceModel(language + "-sent");
SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentModel);
String sentences[] = sentenceDetector.sentDetect(text);
// Get the right models
TokenizerModel tokenModel = getTokenizerModel(language + "-token");
// Iterate through sentences and produce the distilled objects,
// i.e. a sentence object with pos-tagged and stemmed tokens.
List<String> tokenizedText = new ArrayList<>();
for (String sentenceString : sentences) {
// Tokenize the sentence
Tokenizer tokenizer = new TokenizerME(tokenModel);
String tokens[] = tokenizer.tokenize(sentenceString);
for (String token : tokens) {
tokenizedText.add(token);
}
}
return tokenizedText.toArray(new String[tokenizedText.size()]);
}
示例7: testOpenNLP
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
private static String[] testOpenNLP(String text) throws Exception {
try (InputStream modelIn = new FileInputStream(RESOURCES_EN_SENT_BIN)) {
SentenceModel model = new SentenceModel(modelIn);
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
return sentenceDetector.sentDetect(text);
}
}
示例8: SentenceDetect
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public static void SentenceDetect(String sent_model) throws InvalidFormatException, IOException {
String paragraph = "Hi. How are you? This is Mike.";
// always start with a model, a model is learned from training data
InputStream is = new FileInputStream(sent_model);
SentenceModel model = new SentenceModel(is);
is.close();
SentenceDetectorME sdetector = new SentenceDetectorME(model);
String sentences[] = sdetector.sentDetect(paragraph);
System.out.println(sentences[0]);
System.out.println(sentences[1]);
}
示例9: testSentDetector
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public void testSentDetector(String testSents) throws InvalidFormatException{
init();
SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
String[] sentences = sentenceDetector.sentDetect(testSents);
for (int i=0;i<sentences.length; i++)
System.err.println("sent: "+sentences[i]);
}
示例10: parsePassageText
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public Parse[] parsePassageText(String p) throws InvalidFormatException{
if (!modelsAreInitialized)init();
//initialize
SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
NameFinderME nameFinder = new NameFinderME(this.nerModel);
Parser parser = ParserFactory.create(
this.parserModel,
20, // beam size
0.95); // advance percentage
//find sentences, tokenize each, parse each, return top parse for each
String[] sentences = sentenceDetector.sentDetect(p);
Parse[] results = new Parse[sentences.length];
for (int i=0;i<sentences.length;i++){
//String[] tks = SimpleTokenizer.INSTANCE.tokenize(sentences[i]);
//StringTokenizer st = new StringTokenizer(tks[i]);
//There are several tokenizers available. SimpleTokenizer works best
Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
for (int si = 0; si < sentences.length; si++) {
Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]);
String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]);
Span[] names = nameFinder.find(tokens);
for (int ni = 0; ni < names.length; ni++) {
Span startSpan = tokenSpans[names[ni].getStart()];
int nameStart = startSpan.getStart();
Span endSpan = tokenSpans[names[ni].getEnd() - 1];
int nameEnd = endSpan.getEnd();
String name = sentences[si].substring(nameStart, nameEnd);
System.out.println(name);
}
}
String sent= StringUtils.join(tokenizer," ");
System.out.println("Found sentence " + sent);
Parse[] sentResults = ParserTool.parseLine(sent,parser, 1);
results[i]=sentResults[0];
}
return results;
}
示例11: sentences
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public static String[] sentences(String src) {
if (CommonUtil.isEmpty(src)) {
return new String[0];
}
SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentenceModel);
return sentenceDetector.sentDetect(src);
}
示例12: annotate
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
/**
* Annotates the document using the Apache OpenNLP tools.
*
* @param component the component to annotate.
*/
@Override
public void annotate(Blackboard blackboard, DocumentComponent component) {
// set up the annotator
setup();
// Language tag used to retrieve the datasets
String langTag = component.getLanguage().getLanguage();
// Split the text into sentences
SentenceModel sentModel = getSentenceModel(langTag + "-sent");
SentenceDetectorME sentenceDetector = new SentenceDetectorME(sentModel);
String sentences[] = sentenceDetector.sentDetect(component.getText());
// Get the right models
TokenizerModel tokenModel = getTokenizerModel(langTag + "-token");
POSModel POSModel = getPOSTaggerModel(langTag + "-pos-maxent");
// Iterate through sentences and produce the distilled objects,
// i.e. a sentence object with pos-tagged and stemmed tokens.
for (String sentenceString : sentences) {
// the distilled sentence object
Sentence sentence = new Sentence(sentenceString,
"" + sentenceCounter++);
sentence.setLanguage(component.getLanguage());
// Tokenize the sentence
Tokenizer tokenizer = new TokenizerME(tokenModel);
String tokens[] = tokenizer.tokenize(sentenceString);
// POS tag the tokens
POSTaggerME tagger = new POSTaggerME(POSModel);
String tags[] = tagger.tag(tokens);
// put the features detected by OpenNLP in the distiller's
// sentence
for (int i = 0; i < tokens.length; i++) {
Token t = new Token(tokens[i]);
t.setPoS(tags[i]);
sentence.addToken(t);
} // for
((DocumentComposite) component).addComponent(sentence);
} // for (String sentenceString : sentences)
}
示例13: convertToSentences
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
private String[] convertToSentences(String text) {
SentenceDetectorME sentenceDetector = new SentenceDetectorME(
sentenceModel);
return sentenceDetector.sentDetect(text);
}
示例14: DivideIntoSentences
import opennlp.tools.sentdetect.SentenceDetectorME; //導入方法依賴的package包/類
public String[] DivideIntoSentences(Passage p) throws InvalidFormatException{
init();
SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
return sentenceDetector.sentDetect(p.text);
}