當前位置: 首頁>>代碼示例>>Java>>正文


Java TokenStream.reset方法代碼示例

本文整理匯總了Java中org.apache.lucene.analysis.TokenStream.reset方法的典型用法代碼示例。如果您正苦於以下問題:Java TokenStream.reset方法的具體用法?Java TokenStream.reset怎麽用?Java TokenStream.reset使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.lucene.analysis.TokenStream的用法示例。


在下文中一共展示了TokenStream.reset方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: analyze

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
 *  because really the caller who called {@link Analyzer#tokenStream} should close it,
 *  but when trying that there are recursion issues when we try to use the same
 *  TokenStrem twice in the same recursion... */
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
    int numTokens = 0;
    boolean success = false;
    try {
        stream.reset();
        consumer.reset(stream);
        while (stream.incrementToken()) {
            consumer.nextToken();
            numTokens++;
        }
        consumer.end();
    } finally {
        if (success) {
            stream.close();
        } else {
            IOUtils.closeWhileHandlingException(stream);
        }
    }
    return numTokens;
}
 
開發者ID:baidu,項目名稱:Elasticsearch,代碼行數:25,代碼來源:SuggestUtils.java

示例2: testSimple

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
開發者ID:justor,項目名稱:elasticsearch_my,代碼行數:24,代碼來源:UniqueTokenFilterTests.java

示例3: analyze

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
        }
    }));
    IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();

    AllEntries allEntries = new AllEntries();
    allEntries.addText("field1", text, 1.0f);

    TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    List<String> terms = new ArrayList<>();
    while (stream.incrementToken()) {
        String tokText = termAtt.toString();
        terms.add(tokText);
    }
    return terms;
}
 
開發者ID:justor,項目名稱:elasticsearch_my,代碼行數:26,代碼來源:CompoundAnalysisTests.java

示例4: assertCollation

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
    CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
    CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);

    stream1.reset();
    stream2.reset();

    assertThat(stream1.incrementToken(), equalTo(true));
    assertThat(stream2.incrementToken(), equalTo(true));
    assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
    assertThat(stream1.incrementToken(), equalTo(false));
    assertThat(stream2.incrementToken(), equalTo(false));

    stream1.end();
    stream2.end();

    stream1.close();
    stream2.close();
}
 
開發者ID:justor,項目名稱:elasticsearch_my,代碼行數:20,代碼來源:SimpleIcuCollationTokenFilterTests.java

示例5: lemmatize

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
protected String lemmatize(String query) {
	ItalianAnalyzer analyzer = new ItalianAnalyzer();
	TokenStream tokenStream = analyzer.tokenStream("label", query);
	
	
	StringBuilder sb = new StringBuilder();
	CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
	try {
    	tokenStream.reset();
		while (tokenStream.incrementToken()) {
		    if (sb.length() > 0) {
		        sb.append(" ");
		    }
		    sb.append(token.toString());
		}
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	return sb.toString();
}
 
開發者ID:teamdigitale,項目名稱:ontonethub,代碼行數:23,代碼來源:AbstractIndexingJob.java

示例6: after

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
@After
public void after(){

    if(analyzer != null){
        try {
            TokenStream ts = analyzer.tokenStream("field", text);
            CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int i = 0;
            while (ts.incrementToken()) {
                i++;
                System.out.print(ch.toString() + "\t");
                if(i % 7 == 0){
                    System.out.println();
                }
            }
            ts.end();
            ts.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
 
開發者ID:followwwind,項目名稱:apache,代碼行數:24,代碼來源:AnalyzerTest.java

示例7: splitByTokenizer

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
  StringReader reader = new StringReader( source );
  TokenStream ts = loadTokenizer(tokFactory, reader);
  List<String> tokList = new ArrayList<>();
  try {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()){
      if( termAtt.length() > 0 )
        tokList.add( termAtt.toString() );
    }
  } finally{
    reader.close();
  }
  return tokList;
}
 
開發者ID:lamsfoundation,項目名稱:lams,代碼行數:17,代碼來源:SlowSynonymFilterFactory.java

示例8: getFilter

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
@Override
public Filter getFilter(Element e) throws ParserException {
  List<BytesRef> terms = new ArrayList<>();
  String text = DOMUtils.getNonBlankTextOrFail(e);
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");

  TokenStream ts = null;
  try {
    ts = analyzer.tokenStream(fieldName, text);
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();
    ts.reset();
    while (ts.incrementToken()) {
      termAtt.fillBytesRef();
      terms.add(BytesRef.deepCopyOf(bytes));
    }
    ts.end();
  }
  catch (IOException ioe) {
    throw new RuntimeException("Error constructing terms from index:" + ioe);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }
  return new TermsFilter(fieldName, terms);
}
 
開發者ID:lamsfoundation,項目名稱:lams,代碼行數:26,代碼來源:TermsFilterBuilder.java

示例9: termsFromTokenStream

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
private String[] termsFromTokenStream(TokenStream stream) throws IOException {

        List<String> outputTemp=new ArrayList<>();
        CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            outputTemp.add(charTermAttribute.toString());
        }
        stream.end();
        stream.close();

        return outputTemp.toArray(new String[0]);
    }
 
開發者ID:sebastian-hofstaetter,項目名稱:ir-generalized-translation-models,代碼行數:14,代碼來源:SimilarityParser.java

示例10: testMetaphoneWords

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
@Test
public void testMetaphoneWords() throws Exception {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.filter.myStemmer.type", "br_metaphone")
            .build();

    AnalysisService analysisService = createAnalysisService(index, settings, new AnalysisMetaphonePlugin());

    TokenFilterFactory filterFactory = analysisService.tokenFilter("br_metaphone");

    Tokenizer tokenizer = new KeywordTokenizer();
    
    Map<String,String> words = buildWordList();
    
    Set<String> inputWords = words.keySet();
    for(String word : inputWords) {
        tokenizer.setReader(new StringReader(word));
        TokenStream ts = filterFactory.create(tokenizer);

        CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        assertThat(ts.incrementToken(), equalTo(true));
        assertThat(term1.toString(), equalTo(words.get(word)));
        ts.close();
    }
}
 
開發者ID:anaelcarvalho,項目名稱:elasticsearch-analysis-metaphone_ptBR,代碼行數:29,代碼來源:MetaphoneTokenFilterTests.java

示例11: analyze

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);
        PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
開發者ID:justor,項目名稱:elasticsearch_my,代碼行數:32,代碼來源:TransportAnalyzeAction.java

示例12: testSimple

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "a bb ccc dddd eeeee");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("a"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("bb"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ccc"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ddd"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("eee"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
開發者ID:justor,項目名稱:elasticsearch_my,代碼行數:30,代碼來源:TruncateTokenFilterTests.java

示例13: match

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
private void match(String analyzerName, String source, String target) throws IOException {
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();

    TokenStream stream = AllTokenStream.allTokenStream("_all", source, 1.0f, analyzer);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    StringBuilder sb = new StringBuilder();
    while (stream.incrementToken()) {
        sb.append(termAtt.toString()).append(" ");
    }

    MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
}
 
開發者ID:justor,項目名稱:elasticsearch_my,代碼行數:15,代碼來源:SynonymsAnalysisTests.java

示例14: lemmatize

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
public static String lemmatize(String query) {
   	StringBuilder sb = new StringBuilder();
   	
	ItalianAnalyzer analyzer = new ItalianAnalyzer(Version.LUCENE_44);
	TokenStream tokenStream;
	try {
		tokenStream = analyzer.tokenStream("label", query);
		
		CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
		
    	tokenStream.reset();
		while (tokenStream.incrementToken()) {
		    if (sb.length() > 0) {
		        sb.append(" ");
		    }
		    sb.append(token.toString());
		}
		
		analyzer.close();
	} catch (IOException e) {
		log.error(e.getMessage(), e);
		sb = new StringBuilder();
		sb.append(query);
	}
	
	
	return sb.toString();
}
 
開發者ID:teamdigitale,項目名稱:ontonethub,代碼行數:29,代碼來源:JerseyUtils.java

示例15: testAnalyzer

import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
private void testAnalyzer(String source, String... expected_terms) throws IOException {
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisStempelPlugin());

    Analyzer analyzer = analysis.indexAnalyzers.get("polish").analyzer();

    TokenStream ts = analyzer.tokenStream("test", source);

    CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
    ts.reset();

    for (String expected : expected_terms) {
        assertThat(ts.incrementToken(), equalTo(true));
        assertThat(term1.toString(), equalTo(expected));
    }
}
 
開發者ID:justor,項目名稱:elasticsearch_my,代碼行數:16,代碼來源:SimplePolishTokenFilterTests.java


注:本文中的org.apache.lucene.analysis.TokenStream.reset方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。