本文整理汇总了Java中com.ibm.icu.text.RuleBasedBreakIterator类的典型用法代码示例。如果您正苦于以下问题:Java RuleBasedBreakIterator类的具体用法?Java RuleBasedBreakIterator怎么用?Java RuleBasedBreakIterator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
RuleBasedBreakIterator类属于com.ibm.icu.text包,在下文中一共展示了RuleBasedBreakIterator类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: EmojiTokenizerFactory
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
public EmojiTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
config = new DefaultICUTokenizerConfig(true, true) {
@Override
public BreakIterator getBreakIterator(int script) {
// Load the ICU default rules
RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator)
BreakIterator.getWordInstance(Locale.getDefault());
String defaultRules = rbbi.toString();
// Customize the rules to add EmojiNRK as first class word
defaultRules = defaultRules.replace(
"!!forward;",
"!!forward;\n$EmojiNRK {200};"
);
defaultRules = defaultRules.replace(
"| $ZWJ)*;",
"| $ZWJ)* {200};"
);
return new RuleBasedBreakIterator(defaultRules);
}
};
}
示例2: calcStatus
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
return RuleBasedBreakIterator.WORD_NONE;
}
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint)) {
return RuleBasedBreakIterator.WORD_NUMBER;
} else if (UCharacter.isLetter(codepoint)) {
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}
示例3: calcStatus
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE)
return RuleBasedBreakIterator.WORD_NONE;
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint))
return RuleBasedBreakIterator.WORD_NUMBER;
else if (UCharacter.isLetter(codepoint)) {
// TODO: try to separately specify ideographic, kana?
// [currently all bundled as letter for this case]
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}
示例4: maybeLoad
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
private LineBreakIterator maybeLoad(Reporter reporter) {
LineBreakIterator iterator = this.iterator;
if (iterator != null)
return iterator;
else {
BreakIterator bi = null;
InputStream is = null;
try {
URL rulesLocator = getRulesLocator(name, RULES_BINARY_EXT);
if (rulesLocator != null) {
is = rulesLocator.openStream();
bi = RuleBasedBreakIterator.getInstanceFromCompiledRules(is);
reporter.logInfo(reporter.message("*KEY*", "Loaded rules based break iterator from ''{0}''.", rulesLocator.toString()));
} else
bi = BreakIterator.getCharacterInstance();
} catch (IOException e) {
} finally {
IOUtil.closeSafely(is);
}
if (bi != null) {
return this.iterator = new LineBreakIterator(bi);
} else
return null;
}
}
示例5: parseRules
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
private BreakIterator parseRules(String filename, Environment env) throws IOException {
final Path path = env.configFile().resolve(filename);
String rules = Files.readAllLines(path)
.stream()
.filter((v) -> v.startsWith("#") == false)
.collect(Collectors.joining("\n"));
return new RuleBasedBreakIterator(rules.toString());
}
示例6: getType
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
@Override
public String getType(int script, int ruleStatus) {
switch (ruleStatus) {
case RuleBasedBreakIterator.WORD_IDEO:
return WORD_IDEO;
case RuleBasedBreakIterator.WORD_KANA:
return script == UScript.HIRAGANA ? WORD_HIRAGANA : WORD_KATAKANA;
case RuleBasedBreakIterator.WORD_LETTER:
return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER;
case RuleBasedBreakIterator.WORD_NUMBER:
return WORD_NUMBER;
default: /* some other custom code */
return "<OTHER>";
}
}
示例7: readBreakIterator
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
private static RuleBasedBreakIterator readBreakIterator(String filename) {
InputStream is = DefaultIcuTokenizerConfig.class.getResourceAsStream("/org/apache/lucene/analysis/icu/segmentation/" + filename);
try {
RuleBasedBreakIterator bi = RuleBasedBreakIterator.getInstanceFromCompiledRules(is);
is.close();
return bi;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例8: wrap
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
/**
* If its a RuleBasedBreakIterator, the rule status can be used for token type. If its
* any other BreakIterator, the rulestatus method is not available, so treat
* it like a generic BreakIterator.
*/
static BreakIteratorWrapper wrap(BreakIterator breakIterator) {
if (breakIterator instanceof RuleBasedBreakIterator) {
return new RBBIWrapper((RuleBasedBreakIterator) breakIterator);
} else {
return new BIWrapper(breakIterator);
}
}
示例9: wrap
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
/**
* If its a RuleBasedBreakIterator, the rule status can be used for token type. If its
* any other BreakIterator, the rulestatus method is not available, so treat
* it like a generic BreakIterator.
*/
static BreakIteratorWrapper wrap(BreakIterator breakIterator) {
if (breakIterator instanceof RuleBasedBreakIterator)
return new RBBIWrapper((RuleBasedBreakIterator) breakIterator);
else
return new BIWrapper(breakIterator);
}
示例10: parseRules
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
private BreakIterator parseRules(String filename, ResourceLoader loader) throws IOException {
StringBuilder rules = new StringBuilder();
InputStream rulesStream = loader.openResource(filename);
BufferedReader reader = new BufferedReader
(IOUtils.getDecodingReader(rulesStream, StandardCharsets.UTF_8));
String line = null;
while ((line = reader.readLine()) != null) {
if ( ! line.startsWith("#"))
rules.append(line);
rules.append('\n');
}
reader.close();
return new RuleBasedBreakIterator(rules.toString());
}
示例11: getType
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
@Override
public String getType(int script, int ruleStatus) {
switch (ruleStatus) {
case RuleBasedBreakIterator.WORD_IDEO:
return WORD_IDEO;
case RuleBasedBreakIterator.WORD_KANA:
return script == UScript.HIRAGANA ? WORD_HIRAGANA : WORD_KATAKANA;
case RuleBasedBreakIterator.WORD_LETTER:
return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER;
case RuleBasedBreakIterator.WORD_NUMBER:
return WORD_NUMBER;
default: /* some other custom code */
return "<OTHER>";
}
}
示例12: readBreakIterator
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
private static RuleBasedBreakIterator readBreakIterator(String filename) {
InputStream is =
DefaultICUTokenizerConfig.class.getResourceAsStream(filename);
try {
RuleBasedBreakIterator bi =
RuleBasedBreakIterator.getInstanceFromCompiledRules(is);
is.close();
return bi;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例13: compile
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
static void compile(File srcDir, File destDir) throws Exception {
File files[] = srcDir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith("rbbi");
}});
if (files == null) throw new IOException("Path does not exist: " + srcDir);
for (int i = 0; i < files.length; i++) {
File file = files[i];
File outputFile = new File(destDir,
file.getName().replaceAll("rbbi$", "brk"));
String rules = getRules(file);
System.err.print("Compiling " + file.getName() + " to "
+ outputFile.getName() + ": ");
/*
* if there is a syntax error, compileRules() may succeed. the way to
* check is to try to instantiate from the string. additionally if the
* rules are invalid, you can get a useful syntax error.
*/
try {
new RuleBasedBreakIterator(rules);
} catch (IllegalArgumentException e) {
/*
* do this intentionally, so you don't get a massive stack trace
* instead, get a useful syntax error!
*/
System.err.println(e.getMessage());
System.exit(1);
}
FileOutputStream os = new FileOutputStream(outputFile);
RuleBasedBreakIterator.compileRules(rules, os);
os.close();
System.err.println(outputFile.length() + " bytes.");
}
}
示例14: main
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
public static void main(String[] args) {
if (args.length == 2) {
String inputFilePath = args[0];
String outputFilePath = args[1];
InputStream is = null;
OutputStream os = null;
BufferedReader r = null;
try {
is = new FileInputStream(inputFilePath);
os = new FileOutputStream(outputFilePath);
r = new BufferedReader(new InputStreamReader(is, defaultInputEncoding));
StringBuffer rules = new StringBuffer();
String line;
while ((line = r.readLine()) != null) {
rules.append(line);
rules.append('\n');
}
RuleBasedBreakIterator.compileRules(rules.toString(), os);
} catch (IOException e) {
} finally {
IOUtil.closeSafely(r);
IOUtil.closeSafely(os);
IOUtil.closeSafely(is);
}
} else {
System.err.println("Usage: java -cp ... com.skynav.ttpe.text.LineBreaker [INPUT-FILE-PATH] [OUTPUT-FILE-PATH]");
}
}
示例15: clone
import com.ibm.icu.text.RuleBasedBreakIterator; //导入依赖的package包/类
/**
* Clone method. Creates another LaoBreakIterator with the same behavior
* and current state as this one.
* @return The clone.
*/
@Override
public LaoBreakIterator clone() {
LaoBreakIterator other = (LaoBreakIterator) super.clone();
other.rules = (RuleBasedBreakIterator) rules.clone();
other.verify = (RuleBasedBreakIterator) verify.clone();
if (text != null)
other.text = text.clone();
if (working != null)
other.working = working.clone();
if (verifyText != null)
other.verifyText = verifyText.clone();
return other;
}