本文整理汇总了Java中org.apache.lucene.analysis.TokenStream.reset方法的典型用法代码示例。如果您正苦于以下问题:Java TokenStream.reset方法的具体用法?Java TokenStream.reset怎么用?Java TokenStream.reset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.TokenStream
的用法示例。
在下文中一共展示了TokenStream.reset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: analyze
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
* because really the caller who called {@link Analyzer#tokenStream} should close it,
* but when trying that there are recursion issues when we try to use the same
* TokenStrem twice in the same recursion... */
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
int numTokens = 0;
boolean success = false;
try {
stream.reset();
consumer.reset(stream);
while (stream.incrementToken()) {
consumer.nextToken();
numTokens++;
}
consumer.end();
} finally {
if (success) {
stream.close();
} else {
IOUtils.closeWhileHandlingException(stream);
}
}
return numTokens;
}
示例2: testSimple
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public void testSimple() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(t, new UniqueTokenFilter(t));
}
};
TokenStream test = analyzer.tokenStream("test", "this test with test");
test.reset();
CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("this"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("test"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("with"));
assertThat(test.incrementToken(), equalTo(false));
}
示例3: analyze
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
}
}));
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", text, 1.0f);
TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<>();
while (stream.incrementToken()) {
String tokText = termAtt.toString();
terms.add(tokText);
}
return terms;
}
示例4: assertCollation
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);
stream1.reset();
stream2.reset();
assertThat(stream1.incrementToken(), equalTo(true));
assertThat(stream2.incrementToken(), equalTo(true));
assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
assertThat(stream1.incrementToken(), equalTo(false));
assertThat(stream2.incrementToken(), equalTo(false));
stream1.end();
stream2.end();
stream1.close();
stream2.close();
}
示例5: lemmatize
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
protected String lemmatize(String query) {
ItalianAnalyzer analyzer = new ItalianAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("label", query);
StringBuilder sb = new StringBuilder();
CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
try {
tokenStream.reset();
while (tokenStream.incrementToken()) {
if (sb.length() > 0) {
sb.append(" ");
}
sb.append(token.toString());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sb.toString();
}
示例6: after
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@After
public void after(){
if(analyzer != null){
try {
TokenStream ts = analyzer.tokenStream("field", text);
CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
ts.reset();
int i = 0;
while (ts.incrementToken()) {
i++;
System.out.print(ch.toString() + "\t");
if(i % 7 == 0){
System.out.println();
}
}
ts.end();
ts.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
示例7: splitByTokenizer
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
StringReader reader = new StringReader( source );
TokenStream ts = loadTokenizer(tokFactory, reader);
List<String> tokList = new ArrayList<>();
try {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()){
if( termAtt.length() > 0 )
tokList.add( termAtt.toString() );
}
} finally{
reader.close();
}
return tokList;
}
示例8: getFilter
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
示例9: termsFromTokenStream
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private String[] termsFromTokenStream(TokenStream stream) throws IOException {
List<String> outputTemp=new ArrayList<>();
CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
outputTemp.add(charTermAttribute.toString());
}
stream.end();
stream.close();
return outputTemp.toArray(new String[0]);
}
开发者ID:sebastian-hofstaetter,项目名称:ir-generalized-translation-models,代码行数:14,代码来源:SimilarityParser.java
示例10: testMetaphoneWords
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Test
public void testMetaphoneWords() throws Exception {
Index index = new Index("test", "_na_");
Settings settings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.myStemmer.type", "br_metaphone")
.build();
AnalysisService analysisService = createAnalysisService(index, settings, new AnalysisMetaphonePlugin());
TokenFilterFactory filterFactory = analysisService.tokenFilter("br_metaphone");
Tokenizer tokenizer = new KeywordTokenizer();
Map<String,String> words = buildWordList();
Set<String> inputWords = words.keySet();
for(String word : inputWords) {
tokenizer.setReader(new StringReader(word));
TokenStream ts = filterFactory.create(tokenizer);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(words.get(word)));
ts.close();
}
}
开发者ID:anaelcarvalho,项目名称:elasticsearch-analysis-metaphone_ptBR,代码行数:29,代码来源:MetaphoneTokenFilterTests.java
示例11: analyze
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
try {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
示例12: testSimple
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public void testSimple() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
}
};
TokenStream test = analyzer.tokenStream("test", "a bb ccc dddd eeeee");
test.reset();
CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("a"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("bb"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("ccc"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("ddd"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("eee"));
assertThat(test.incrementToken(), equalTo(false));
}
示例13: match
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void match(String analyzerName, String source, String target) throws IOException {
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
TokenStream stream = AllTokenStream.allTokenStream("_all", source, 1.0f, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
StringBuilder sb = new StringBuilder();
while (stream.incrementToken()) {
sb.append(termAtt.toString()).append(" ");
}
MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
}
示例14: lemmatize
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public static String lemmatize(String query) {
StringBuilder sb = new StringBuilder();
ItalianAnalyzer analyzer = new ItalianAnalyzer(Version.LUCENE_44);
TokenStream tokenStream;
try {
tokenStream = analyzer.tokenStream("label", query);
CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
if (sb.length() > 0) {
sb.append(" ");
}
sb.append(token.toString());
}
analyzer.close();
} catch (IOException e) {
log.error(e.getMessage(), e);
sb = new StringBuilder();
sb.append(query);
}
return sb.toString();
}
示例15: testAnalyzer
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void testAnalyzer(String source, String... expected_terms) throws IOException {
TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisStempelPlugin());
Analyzer analyzer = analysis.indexAnalyzers.get("polish").analyzer();
TokenStream ts = analyzer.tokenStream("test", source);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
for (String expected : expected_terms) {
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(expected));
}
}