本文整理汇总了Java中org.apache.lucene.analysis.util.CharArraySet.copy方法的典型用法代码示例。如果您正苦于以下问题:Java CharArraySet.copy方法的具体用法?Java CharArraySet.copy怎么用?Java CharArraySet.copy使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.util.CharArraySet
的用法示例。
在下文中一共展示了CharArraySet.copy方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: WordDelimiterTokenFilterFactory
import org.apache.lucene.analysis.util.CharArraySet; //导入方法依赖的package包/类
@Inject
public WordDelimiterTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings);
// Sample Format for the type table:
// $ => DIGIT
// % => DIGIT
// . => DIGIT
// \u002C => DIGIT
// \u200D => ALPHANUM
List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
if (charTypeTableValues == null) {
this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
} else {
this.charTypeTable = parseTypes(charTypeTableValues);
}
int flags = 0;
// If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
// If set, causes number subwords to be generated: "500-42" => "500" "42"
flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
// 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
// If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
// If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
// 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
// If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
// 1, causes "j2se" to be three tokens; "j" "2" "se"
flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
// If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
// If not null is the set of tokens to protect from being delimited
Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
this.flags = flags;
}