当前位置: 首页>>代码示例>>Java>>正文


Java BreakIterator.getSentenceInstance方法代码示例

本文整理汇总了Java中java.text.BreakIterator.getSentenceInstance方法的典型用法代码示例。如果您正苦于以下问题:Java BreakIterator.getSentenceInstance方法的具体用法?Java BreakIterator.getSentenceInstance怎么用?Java BreakIterator.getSentenceInstance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在java.text.BreakIterator的用法示例。


在下文中一共展示了BreakIterator.getSentenceInstance方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getBoundaryScanner

import java.text.BreakIterator; //导入方法依赖的package包/类
private static BoundaryScanner getBoundaryScanner(Field field) {
    final FieldOptions fieldOptions = field.fieldOptions();
    final Locale boundaryScannerLocale = fieldOptions.boundaryScannerLocale();
    switch(fieldOptions.boundaryScannerType()) {
    case SENTENCE:
        if (boundaryScannerLocale != null) {
            return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale));
        }
        return DEFAULT_SENTENCE_BOUNDARY_SCANNER;
    case WORD:
        if (boundaryScannerLocale != null) {
            return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale));
        }
        return DEFAULT_WORD_BOUNDARY_SCANNER;
    default:
        if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN
                || fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) {
            return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(), fieldOptions.boundaryChars());
        }
        return DEFAULT_SIMPLE_BOUNDARY_SCANNER;
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:23,代码来源:FastVectorHighlighter.java

示例2: DocumentWordTokenizer

import java.text.BreakIterator; //导入方法依赖的package包/类
/**
 * Creates a new DocumentWordTokenizer to work on a document
 * @param document The document to spell check
 */
public DocumentWordTokenizer(Document document) {
  this.document = document;
  //Create a text segment over the entire document
  text = new Segment();
  sentenceIterator = BreakIterator.getSentenceInstance();
  try {
    document.getText(0, document.getLength(), text);
    sentenceIterator.setText(text);
    // robert: use text.getBeginIndex(), not 0, for segment's first offset
    currentWordPos = getNextWordStart(text, text.getBeginIndex());
    //If the current word pos is -1 then the string was all white space
    if (currentWordPos != -1) {
      currentWordEnd = getNextWordEnd(text, currentWordPos);
      nextWordPos = getNextWordStart(text, currentWordEnd);
    } else {
      moreTokens = false;
    }
  } catch (BadLocationException ex) {
    moreTokens = false;
  }
}
 
开发者ID:Thecarisma,项目名称:powertext,代码行数:26,代码来源:DocumentWordTokenizer.java

示例3: splitSentences

import java.text.BreakIterator; //导入方法依赖的package包/类
/**
 * Splits string into sentences by line breaks and punctuation marks.
 *
 * @param text the text to be split
 * @return Sentences as string array
 * @see java.text.BreakIterator#getSentenceInstance(Locale)
 */
private static String[] splitSentences(String text) {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.GERMAN);
    iterator.setText(text);
    ArrayList<String> sentenceList = new ArrayList<>(text.length() / 6); // Avg word length in german is 5.7
    int start = iterator.first();
    for (int end = iterator.next();
         end != BreakIterator.DONE;
         start = end, end = iterator.next()) {

        String sentence = text.substring(start, end).trim();
        // Exclude empty sentences
        if (sentence.length() > 0) {
            Stream.of(sentence.split("\n"))
                    .filter(s -> s.length() > 0 && !s.equals("\r"))
                    .forEach(sentenceList::add);
        }
    }
    sentenceList.trimToSize(); // Remove unused indices

    // Convert ArrayList to array
    String[] sentences = new String[sentenceList.size()];
    sentenceList.toArray(sentences);
    return sentences;
}
 
开发者ID:AudiophileDev,项目名称:T2M,代码行数:32,代码来源:TextAnalyser.java

示例4: splitBySentence

import java.text.BreakIterator; //导入方法依赖的package包/类
private static String[] splitBySentence(String text) {
    List<String> sentences = new ArrayList<String>();
    // Use Locale.US since the customizer is setting the default (US) locale text only:
    BreakIterator it = BreakIterator.getSentenceInstance(Locale.US);
    it.setText(text);
    int start = it.first();
    int end;
    while ((end = it.next()) != BreakIterator.DONE) {
        sentences.add(text.substring(start, end));
        start = end;
    }
    return sentences.toArray(new String[sentences.size()]);
}
 
开发者ID:apache,项目名称:incubator-netbeans,代码行数:14,代码来源:LocalizedBundleInfo.java

示例5: testSingleSentences

import java.text.BreakIterator; //导入方法依赖的package包/类
public void testSingleSentences() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
    assertSameBreaks("a", expected, actual);
    assertSameBreaks("ab", expected, actual);
    assertSameBreaks("abc", expected, actual);
    assertSameBreaks("", expected, actual);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:CustomSeparatorBreakIteratorTests.java

示例6: testSliceEnd

import java.text.BreakIterator; //导入方法依赖的package包/类
public void testSliceEnd() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
    assertSameBreaks("a000", 0, 1, expected, actual);
    assertSameBreaks("ab000", 0, 1, expected, actual);
    assertSameBreaks("abc000", 0, 1, expected, actual);
    assertSameBreaks("000", 0, 0, expected, actual);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:CustomSeparatorBreakIteratorTests.java

示例7: testSliceStart

import java.text.BreakIterator; //导入方法依赖的package包/类
public void testSliceStart() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
    assertSameBreaks("000a", 3, 1, expected, actual);
    assertSameBreaks("000ab", 3, 2, expected, actual);
    assertSameBreaks("000abc", 3, 3, expected, actual);
    assertSameBreaks("000", 3, 0, expected, actual);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:CustomSeparatorBreakIteratorTests.java

示例8: testSliceMiddle

import java.text.BreakIterator; //导入方法依赖的package包/类
public void testSliceMiddle() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
    assertSameBreaks("000a000", 3, 1, expected, actual);
    assertSameBreaks("000ab000", 3, 2, expected, actual);
    assertSameBreaks("000abc000", 3, 3, expected, actual);
    assertSameBreaks("000000", 3, 0, expected, actual);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:CustomSeparatorBreakIteratorTests.java

示例9: useSentenceIterator

import java.text.BreakIterator; //导入方法依赖的package包/类
public void useSentenceIterator(String source){
	BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);
	iterator.setText(source);
	int start = iterator.first();
	for (int end = iterator.next();
	    end != BreakIterator.DONE;
	    start = end, end = iterator.next()) {
	  System.out.println(source.substring(start,end));
	}
}
 
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:11,代码来源:SentenceDetection.java

示例10: DocLocale

import java.text.BreakIterator; //导入方法依赖的package包/类
/**
 * Constructor
 */
DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) {
    this.docenv = docenv;
    this.localeName = localeName;
    this.useBreakIterator = useBreakIterator;
    locale = getLocale();
    if (locale == null) {
        docenv.exit();
    } else {
        Locale.setDefault(locale); // NOTE: updating global state
    }
    collator = Collator.getInstance(locale);
    sentenceBreaker = BreakIterator.getSentenceInstance(locale);
}
 
开发者ID:SunburstApps,项目名称:OpenJSharp,代码行数:17,代码来源:DocLocale.java

示例11: getSegmentAt

import java.text.BreakIterator; //导入方法依赖的package包/类
/**
 * Returns the Segment at <code>index</code> representing either
 * the paragraph or sentence as identified by <code>part</code>, or
 * null if a valid paragraph/sentence can't be found. The offset
 * will point to the start of the word/sentence in the array, and
 * the modelOffset will point to the location of the word/sentence
 * in the model.
 */
private IndexedSegment getSegmentAt(int part, int index)
    throws BadLocationException {

    IndexedSegment seg = getParagraphElementText(index);
    if (seg == null) {
        return null;
    }
    BreakIterator iterator;
    switch (part) {
    case AccessibleText.WORD:
        iterator = BreakIterator.getWordInstance(getLocale());
        break;
    case AccessibleText.SENTENCE:
        iterator = BreakIterator.getSentenceInstance(getLocale());
        break;
    default:
        return null;
    }
    seg.first();
    iterator.setText(seg);
    int end = iterator.following(index - seg.modelOffset + seg.offset);
    if (end == BreakIterator.DONE) {
        return null;
    }
    if (end > seg.offset + seg.count) {
        return null;
    }
    int begin = iterator.previous();
    if (begin == BreakIterator.DONE ||
        begin >= seg.offset + seg.count) {
        return null;
    }
    seg.modelOffset = seg.modelOffset + begin - seg.offset;
    seg.offset = begin;
    seg.count = end - begin;
    return seg;
}
 
开发者ID:AdoptOpenJDK,项目名称:openjdk-jdk10,代码行数:46,代码来源:AccessibleHTML.java

示例12: BreakIteratorTest

import java.text.BreakIterator; //导入方法依赖的package包/类
public BreakIteratorTest()
{
    characterBreak = BreakIterator.getCharacterInstance();
    wordBreak = BreakIterator.getWordInstance();
    lineBreak = BreakIterator.getLineInstance();
    sentenceBreak = BreakIterator.getSentenceInstance();
}
 
开发者ID:AdoptOpenJDK,项目名称:openjdk-jdk10,代码行数:8,代码来源:BreakIteratorTest.java

示例13: BreakIteratorSentenceSplitter

import java.text.BreakIterator; //导入方法依赖的package包/类
/**
 * Constructor for the default locale.
 */
public BreakIteratorSentenceSplitter() {
    boundary = BreakIterator.getSentenceInstance();
}
 
开发者ID:takun2s,项目名称:smile_1.5.0_java7,代码行数:7,代码来源:BreakIteratorSentenceSplitter.java

示例14: testFirstPosition

import java.text.BreakIterator; //导入方法依赖的package包/类
/** the current position must be ignored, initial position is always first() */
public void testFirstPosition() throws Exception {
    BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
    BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
    assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:7,代码来源:CustomSeparatorBreakIteratorTests.java

示例15: init

import java.text.BreakIterator; //导入方法依赖的package包/类
/**
 * Initializes the sentenseIterator
 */
protected void init() {
  sentenceIterator = BreakIterator.getSentenceInstance();
  sentenceIterator.setText(text);
}
 
开发者ID:Thecarisma,项目名称:powertext,代码行数:8,代码来源:AbstractWordFinder.java


注:本文中的java.text.BreakIterator.getSentenceInstance方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。