当前位置: 首页>>代码示例>>Java>>正文


Java TokenStream.reset方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.TokenStream.reset方法的典型用法代码示例。如果您正苦于以下问题:Java TokenStream.reset方法的具体用法?Java TokenStream.reset怎么用?Java TokenStream.reset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.TokenStream的用法示例。


在下文中一共展示了TokenStream.reset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
 *  because really the caller who called {@link Analyzer#tokenStream} should close it,
 *  but when trying that there are recursion issues when we try to use the same
 *  TokenStrem twice in the same recursion... */
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
    int numTokens = 0;
    boolean success = false;
    try {
        stream.reset();
        consumer.reset(stream);
        while (stream.incrementToken()) {
            consumer.nextToken();
            numTokens++;
        }
        consumer.end();
    } finally {
        if (success) {
            stream.close();
        } else {
            IOUtils.closeWhileHandlingException(stream);
        }
    }
    return numTokens;
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:25,代码来源:SuggestUtils.java

示例2: testSimple

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:24,代码来源:UniqueTokenFilterTests.java

示例3: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
        }
    }));
    IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();

    AllEntries allEntries = new AllEntries();
    allEntries.addText("field1", text, 1.0f);

    TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    List<String> terms = new ArrayList<>();
    while (stream.incrementToken()) {
        String tokText = termAtt.toString();
        terms.add(tokText);
    }
    return terms;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:CompoundAnalysisTests.java

示例4: assertCollation

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
    CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
    CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);

    stream1.reset();
    stream2.reset();

    assertThat(stream1.incrementToken(), equalTo(true));
    assertThat(stream2.incrementToken(), equalTo(true));
    assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
    assertThat(stream1.incrementToken(), equalTo(false));
    assertThat(stream2.incrementToken(), equalTo(false));

    stream1.end();
    stream2.end();

    stream1.close();
    stream2.close();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimpleIcuCollationTokenFilterTests.java

示例5: lemmatize

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
protected String lemmatize(String query) {
	ItalianAnalyzer analyzer = new ItalianAnalyzer();
	TokenStream tokenStream = analyzer.tokenStream("label", query);
	
	
	StringBuilder sb = new StringBuilder();
	CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
	try {
    	tokenStream.reset();
		while (tokenStream.incrementToken()) {
		    if (sb.length() > 0) {
		        sb.append(" ");
		    }
		    sb.append(token.toString());
		}
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	return sb.toString();
}
 
开发者ID:teamdigitale,项目名称:ontonethub,代码行数:23,代码来源:AbstractIndexingJob.java

示例6: after

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@After
public void after(){

    if(analyzer != null){
        try {
            TokenStream ts = analyzer.tokenStream("field", text);
            CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int i = 0;
            while (ts.incrementToken()) {
                i++;
                System.out.print(ch.toString() + "\t");
                if(i % 7 == 0){
                    System.out.println();
                }
            }
            ts.end();
            ts.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
 
开发者ID:followwwind,项目名称:apache,代码行数:24,代码来源:AnalyzerTest.java

示例7: splitByTokenizer

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
  StringReader reader = new StringReader( source );
  TokenStream ts = loadTokenizer(tokFactory, reader);
  List<String> tokList = new ArrayList<>();
  try {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()){
      if( termAtt.length() > 0 )
        tokList.add( termAtt.toString() );
    }
  } finally{
    reader.close();
  }
  return tokList;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:17,代码来源:SlowSynonymFilterFactory.java

示例8: getFilter

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Filter getFilter(Element e) throws ParserException {
  List<BytesRef> terms = new ArrayList<>();
  String text = DOMUtils.getNonBlankTextOrFail(e);
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");

  TokenStream ts = null;
  try {
    ts = analyzer.tokenStream(fieldName, text);
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();
    ts.reset();
    while (ts.incrementToken()) {
      termAtt.fillBytesRef();
      terms.add(BytesRef.deepCopyOf(bytes));
    }
    ts.end();
  }
  catch (IOException ioe) {
    throw new RuntimeException("Error constructing terms from index:" + ioe);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }
  return new TermsFilter(fieldName, terms);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:26,代码来源:TermsFilterBuilder.java

示例9: termsFromTokenStream

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private String[] termsFromTokenStream(TokenStream stream) throws IOException {

        List<String> outputTemp=new ArrayList<>();
        CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            outputTemp.add(charTermAttribute.toString());
        }
        stream.end();
        stream.close();

        return outputTemp.toArray(new String[0]);
    }
 
开发者ID:sebastian-hofstaetter,项目名称:ir-generalized-translation-models,代码行数:14,代码来源:SimilarityParser.java

示例10: testMetaphoneWords

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Test
public void testMetaphoneWords() throws Exception {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.filter.myStemmer.type", "br_metaphone")
            .build();

    AnalysisService analysisService = createAnalysisService(index, settings, new AnalysisMetaphonePlugin());

    TokenFilterFactory filterFactory = analysisService.tokenFilter("br_metaphone");

    Tokenizer tokenizer = new KeywordTokenizer();
    
    Map<String,String> words = buildWordList();
    
    Set<String> inputWords = words.keySet();
    for(String word : inputWords) {
        tokenizer.setReader(new StringReader(word));
        TokenStream ts = filterFactory.create(tokenizer);

        CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        assertThat(ts.incrementToken(), equalTo(true));
        assertThat(term1.toString(), equalTo(words.get(word)));
        ts.close();
    }
}
 
开发者ID:anaelcarvalho,项目名称:elasticsearch-analysis-metaphone_ptBR,代码行数:29,代码来源:MetaphoneTokenFilterTests.java

示例11: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);
        PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:TransportAnalyzeAction.java

示例12: testSimple

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "a bb ccc dddd eeeee");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("a"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("bb"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ccc"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ddd"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("eee"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:30,代码来源:TruncateTokenFilterTests.java

示例13: match

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void match(String analyzerName, String source, String target) throws IOException {
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();

    TokenStream stream = AllTokenStream.allTokenStream("_all", source, 1.0f, analyzer);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    StringBuilder sb = new StringBuilder();
    while (stream.incrementToken()) {
        sb.append(termAtt.toString()).append(" ");
    }

    MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:15,代码来源:SynonymsAnalysisTests.java

示例14: lemmatize

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public static String lemmatize(String query) {
   	StringBuilder sb = new StringBuilder();
   	
	ItalianAnalyzer analyzer = new ItalianAnalyzer(Version.LUCENE_44);
	TokenStream tokenStream;
	try {
		tokenStream = analyzer.tokenStream("label", query);
		
		CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
		
    	tokenStream.reset();
		while (tokenStream.incrementToken()) {
		    if (sb.length() > 0) {
		        sb.append(" ");
		    }
		    sb.append(token.toString());
		}
		
		analyzer.close();
	} catch (IOException e) {
		log.error(e.getMessage(), e);
		sb = new StringBuilder();
		sb.append(query);
	}
	
	
	return sb.toString();
}
 
开发者ID:teamdigitale,项目名称:ontonethub,代码行数:29,代码来源:JerseyUtils.java

示例15: testAnalyzer

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void testAnalyzer(String source, String... expected_terms) throws IOException {
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisStempelPlugin());

    Analyzer analyzer = analysis.indexAnalyzers.get("polish").analyzer();

    TokenStream ts = analyzer.tokenStream("test", source);

    CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
    ts.reset();

    for (String expected : expected_terms) {
        assertThat(ts.incrementToken(), equalTo(true));
        assertThat(term1.toString(), equalTo(expected));
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:16,代码来源:SimplePolishTokenFilterTests.java


注:本文中的org.apache.lucene.analysis.TokenStream.reset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。