当前位置: 首页>>代码示例>>Java>>正文


Java TokenStream.addAttribute方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.TokenStream.addAttribute方法的典型用法代码示例。如果您正苦于以下问题:Java TokenStream.addAttribute方法的具体用法?Java TokenStream.addAttribute怎么用?Java TokenStream.addAttribute使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.TokenStream的用法示例。


在下文中一共展示了TokenStream.addAttribute方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testSimple

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:24,代码来源:UniqueTokenFilterTests.java

示例2: assertOffsets

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
static private void assertOffsets(String inputStr, TokenStream tokenStream, List<String> expected) {
    try {
        List<String> termList = new ArrayList<String>();
        // CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttr = tokenStream.addAttribute(OffsetAttribute.class);
        while (tokenStream.incrementToken()) {
            int start = offsetAttr.startOffset();
            int end = offsetAttr.endOffset();
            termList.add(inputStr.substring(start, end));
        }
        System.out.println(String.join(" ", termList));
        assertThat(termList, is(expected));
    } catch (IOException e) {
        assertTrue(false);
    }
}
 
开发者ID:BuddhistDigitalResourceCenter,项目名称:lucene-bo,代码行数:17,代码来源:TibetanAnalyzerTest.java

示例3: testToken

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void testToken(String source, String expected) throws IOException {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put("index.analysis.filter.myStemmer.type", "polish_stem")
            .build();
    TestAnalysis analysis = createTestAnalysis(index, settings, new AnalysisStempelPlugin());

    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myStemmer");

    Tokenizer tokenizer = new KeywordTokenizer();
    tokenizer.setReader(new StringReader(source));
    TokenStream ts = filterFactory.create(tokenizer);

    CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    assertThat(ts.incrementToken(), equalTo(true));

    assertThat(term1.toString(), equalTo(expected));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimplePolishTokenFilterTests.java

示例4: assertCollation

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
    CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
    CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);

    stream1.reset();
    stream2.reset();

    assertThat(stream1.incrementToken(), equalTo(true));
    assertThat(stream2.incrementToken(), equalTo(true));
    assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
    assertThat(stream1.incrementToken(), equalTo(false));
    assertThat(stream2.incrementToken(), equalTo(false));

    stream1.end();
    stream2.end();

    stream1.close();
    stream2.close();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimpleIcuCollationTokenFilterTests.java

示例5: after

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@After
public void after(){

    if(analyzer != null){
        try {
            TokenStream ts = analyzer.tokenStream("field", text);
            CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int i = 0;
            while (ts.incrementToken()) {
                i++;
                System.out.print(ch.toString() + "\t");
                if(i % 7 == 0){
                    System.out.println();
                }
            }
            ts.end();
            ts.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
 
开发者ID:followwwind,项目名称:apache,代码行数:24,代码来源:AnalyzerTest.java

示例6: splitByTokenizer

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
  StringReader reader = new StringReader( source );
  TokenStream ts = loadTokenizer(tokFactory, reader);
  List<String> tokList = new ArrayList<>();
  try {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()){
      if( termAtt.length() > 0 )
        tokList.add( termAtt.toString() );
    }
  } finally{
    reader.close();
  }
  return tokList;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:17,代码来源:SlowSynonymFilterFactory.java

示例7: PrefixAwareTokenFilter

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;
  
  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:21,代码来源:PrefixAwareTokenFilter.java

示例8: assertTokenStream

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
static private void assertTokenStream(TokenStream tokenStream, List<String> expected) {
	try {
		List<String> termList = new ArrayList<String>();
		CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
		while (tokenStream.incrementToken()) {
			termList.add(charTermAttribute.toString());
		}
		System.out.println(String.join(" ", termList));
		assertThat(termList, is(expected));
	} catch (IOException e) {
		assertTrue(false);
	}
}
 
开发者ID:BuddhistDigitalResourceCenter,项目名称:lucene-bo,代码行数:14,代码来源:TibetanAnalyzerTest.java

示例9: termsFromTokenStream

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private String[] termsFromTokenStream(TokenStream stream) throws IOException {

        List<String> outputTemp=new ArrayList<>();
        CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            outputTemp.add(charTermAttribute.toString());
        }
        stream.end();
        stream.close();

        return outputTemp.toArray(new String[0]);
    }
 
开发者ID:sebastian-hofstaetter,项目名称:ir-generalized-translation-models,代码行数:14,代码来源:SimilarityApiParser.java

示例10: testMetaphonePhrases

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Test
public void testMetaphonePhrases() throws Exception {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.analyzer.myAnalyzer.type", "custom")
            .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
            .put("index.analysis.analyzer.myAnalyzer.filter", "br_metaphone")
            .build();

    AnalysisService analysisService = createAnalysisService(index, settings, new AnalysisMetaphonePlugin());

    Analyzer analyzer = analysisService.analyzer("myAnalyzer");
    
    Map<String,List<String>> phrases = buildPhraseList();
    
    for(String phrase : phrases.keySet()) {
     List<String> outputWords = phrases.get(phrase);
     
     TokenStream ts = analyzer.tokenStream("test", phrase);
	
     CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
     ts.reset();
	
     for (String expected : outputWords) {
         assertThat(ts.incrementToken(), equalTo(true));
         assertThat(term1.toString(), equalTo(expected));
     }
     ts.close();
    
    }
}
 
开发者ID:anaelcarvalho,项目名称:elasticsearch-analysis-metaphone_ptBR,代码行数:33,代码来源:MetaphoneTokenFilterTests.java

示例11: analysisResult

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/**
 * 打印出给定分词器的分词结果
 *
 * @param analyzer 分词器
 * @param keyWord  关键词
 * @throws Exception
 */
private static List<String> analysisResult(Analyzer analyzer, String keyWord)
        throws Exception {
    TokenStream tokenStream = analyzer.tokenStream("content",
            new StringReader(keyWord));
    tokenStream.addAttribute(CharTermAttribute.class);
    List<String> stringList = new ArrayList<String>();
    while (tokenStream.incrementToken()) {
        CharTermAttribute charTermAttribute = tokenStream
                .getAttribute(CharTermAttribute.class);
        stringList.add(charTermAttribute.toString());

    }
    return stringList;
}
 
开发者ID:NeilRen,项目名称:NEILREN4J,代码行数:22,代码来源:IKAnalyzerService.java

示例12: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);
        PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:TransportAnalyzeAction.java

示例13: testSimple

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new TruncateTokenFilter(t, 3));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "a bb ccc dddd eeeee");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("a"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("bb"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ccc"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("ddd"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("eee"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:30,代码来源:TruncateTokenFilterTests.java

示例14: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:31,代码来源:TransportAnalyzeAction.java

示例15: reset

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public void reset(TokenStream stream) {
    charTermAttr = stream.addAttribute(CharTermAttribute.class);
    posIncAttr = stream.addAttribute(PositionIncrementAttribute.class);
    offsetAttr = stream.addAttribute(OffsetAttribute.class);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:6,代码来源:DirectCandidateGenerator.java


注:本文中的org.apache.lucene.analysis.TokenStream.addAttribute方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。