本文整理汇总了Java中org.apache.lucene.analysis.TokenStream类的典型用法代码示例。如果您正苦于以下问题:Java TokenStream类的具体用法?Java TokenStream怎么用?Java TokenStream使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TokenStream类属于org.apache.lucene.analysis包,在下文中一共展示了TokenStream类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: assertCollation
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);
stream1.reset();
stream2.reset();
assertThat(stream1.incrementToken(), equalTo(true));
assertThat(stream2.incrementToken(), equalTo(true));
assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
assertThat(stream1.incrementToken(), equalTo(false));
assertThat(stream2.incrementToken(), equalTo(false));
stream1.end();
stream2.end();
stream1.close();
stream2.close();
}
示例2: after
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
@After
public void after(){
if(analyzer != null){
try {
TokenStream ts = analyzer.tokenStream("field", text);
CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
ts.reset();
int i = 0;
while (ts.incrementToken()) {
i++;
System.out.print(ch.toString() + "\t");
if(i % 7 == 0){
System.out.println();
}
}
ts.end();
ts.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
示例3: splitStringIntoTerms
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
private String[] splitStringIntoTerms(String value) {
try {
List<String> results = new ArrayList<>();
try (TokenStream tokens = analyzer.tokenStream("", value)) {
CharTermAttribute term = tokens.getAttribute(CharTermAttribute.class);
tokens.reset();
while (tokens.incrementToken()) {
String t = term.toString().trim();
if (t.length() > 0) {
results.add(t);
}
}
}
return results.toArray(new String[results.size()]);
} catch (IOException e) {
throw new MemgraphException("Could not tokenize string: " + value, e);
}
}
示例4: PrefixAwareTokenFilter
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
super(suffix);
this.suffix = suffix;
this.prefix = prefix;
prefixExhausted = false;
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
p_termAtt = prefix.addAttribute(CharTermAttribute.class);
p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
p_typeAtt = prefix.addAttribute(TypeAttribute.class);
p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
示例5: getFilter
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
示例6: findGoodEndForNoHighlightExcerpt
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents)
throws IOException {
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
tokenStream.end();
// We've exhausted the token stream so we should just highlight everything.
return end;
}
}
示例7: lemmatize
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
protected String lemmatize(String query) {
ItalianAnalyzer analyzer = new ItalianAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("label", query);
StringBuilder sb = new StringBuilder();
CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
try {
tokenStream.reset();
while (tokenStream.incrementToken()) {
if (sb.length() > 0) {
sb.append(" ");
}
sb.append(token.toString());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sb.toString();
}
示例8: Field
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
/**
* Create field with TokenStream value.
* @param name field name
* @param tokenStream TokenStream value
* @param type field type
* @throws IllegalArgumentException if either the name or type
* is null, or if the field's type is stored(), or
* if tokenized() is false, or if indexed() is false.
* @throws NullPointerException if the tokenStream is null
*/
public Field(String name, TokenStream tokenStream, FieldType type) {
if (name == null) {
throw new IllegalArgumentException("name cannot be null");
}
if (tokenStream == null) {
throw new NullPointerException("tokenStream cannot be null");
}
if (!type.indexed() || !type.tokenized()) {
throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized");
}
if (type.stored()) {
throw new IllegalArgumentException("TokenStream fields cannot be stored");
}
this.name = name;
this.fieldsData = null;
this.tokenStream = tokenStream;
this.type = type;
}
示例9: parseQueryString
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
// Logic similar to QueryParser#getFieldQuery
int count = 0;
try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
BytesRefBuilder builder = new BytesRefBuilder();
while (source.incrementToken()) {
// UTF-8
builder.copyChars(termAtt);
query.add(new Term(field, builder.toBytesRef()));
count++;
}
}
if (count == 0) {
return null;
}
query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
return query;
}
示例10: analyze
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
}
}));
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", text, 1.0f);
TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<>();
while (stream.incrementToken()) {
String tokText = termAtt.toString();
terms.add(tokText);
}
return terms;
}
示例11: Lucene43CompoundWordTokenFilterBase
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
protected Lucene43CompoundWordTokenFilterBase(TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
super(input);
this.tokens=new LinkedList<>();
if (minWordSize < 0) {
throw new IllegalArgumentException("minWordSize cannot be negative");
}
this.minWordSize=minWordSize;
if (minSubwordSize < 0) {
throw new IllegalArgumentException("minSubwordSize cannot be negative");
}
this.minSubwordSize=minSubwordSize;
if (maxSubwordSize < 0) {
throw new IllegalArgumentException("maxSubwordSize cannot be negative");
}
this.maxSubwordSize=maxSubwordSize;
this.onlyLongestMatch=onlyLongestMatch;
this.dictionary = dictionary;
}
示例12: extractExtendedAttributes
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
/**
* other attribute extract object.
* Extracted object group by AttributeClassName
*
* @param stream current TokenStream
* @param includeAttributes filtering attributes
* @return Map<key value>
*/
private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) {
final Map<String, Object> extendedAttributes = new TreeMap<>();
stream.reflectWith((attClass, key, value) -> {
if (CharTermAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (OffsetAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (TypeAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
if (value instanceof BytesRef) {
final BytesRef p = (BytesRef) value;
value = p.toString();
}
extendedAttributes.put(key, value);
}
});
return extendedAttributes;
}
示例13: AlternateSpellingFilter
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
public AlternateSpellingFilter(TokenStream tokenStream) {
super(tokenStream);
this.previousTokens = new ArrayList<String>();
this.alternateSpellings = new HashMap<String, String>();
this.alternateSpellings.put("היא", "הוא");
}
示例14: createStackedTokenStream
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) {
Reader reader = new FastStringReader(source);
for (CharFilterFactory charFilterFactory : charFilterFactories) {
reader = charFilterFactory.create(reader);
}
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(reader);
TokenStream tokenStream = tokenizer;
for (int i = 0; i < current; i++) {
tokenStream = tokenFilterFactories[i].create(tokenStream);
}
return tokenStream;
}
示例15: testMetaphoneWords
import org.apache.lucene.analysis.TokenStream; //导入依赖的package包/类
@Test
public void testMetaphoneWords() throws Exception {
Index index = new Index("test", "_na_");
Settings settings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.myStemmer.type", "br_metaphone")
.build();
AnalysisService analysisService = createAnalysisService(index, settings, new AnalysisMetaphonePlugin());
TokenFilterFactory filterFactory = analysisService.tokenFilter("br_metaphone");
Tokenizer tokenizer = new KeywordTokenizer();
Map<String,String> words = buildWordList();
Set<String> inputWords = words.keySet();
for(String word : inputWords) {
tokenizer.setReader(new StringReader(word));
TokenStream ts = filterFactory.create(tokenizer);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(words.get(word)));
ts.close();
}
}
开发者ID:anaelcarvalho,项目名称:elasticsearch-analysis-metaphone_ptBR,代码行数:29,代码来源:MetaphoneTokenFilterTests.java