当前位置: 首页>>代码示例>>Java>>正文


Java CharTermAttribute类代码示例

本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.CharTermAttribute的典型用法代码示例。如果您正苦于以下问题:Java CharTermAttribute类的具体用法?Java CharTermAttribute怎么用?Java CharTermAttribute使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


CharTermAttribute类属于org.apache.lucene.analysis.tokenattributes包,在下文中一共展示了CharTermAttribute类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
	System.out.println(parse);
	List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");
	
  
	//System.out.println(parse1);
	String text11="ZW321282050000000325";
	
	Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
	CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
	OffsetAttribute offsetAtt = 
			tokenizer.addAttribute(OffsetAttribute.class);
		PositionIncrementAttribute positionIncrementAtt = 
			tokenizer.addAttribute(PositionIncrementAttribute.class);

    tokenizer.reset();
	while (tokenizer.incrementToken()){

	      System.out.print(new String(termAtt.toString()+" ") );
		//  System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
		//System.out.print( positionIncrementAtt.getPositionIncrement() +"/");

	}
	tokenizer.close();
}
 
开发者ID:dimensoft,项目名称:improved-journey,代码行数:27,代码来源:TestAnsj.java

示例2: assertTokenStream

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
    tokenStream.reset();
    int index = 0;
    while (tokenStream.incrementToken() == true) {
        assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());

        if(expectedTypes != null) {
            assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
        }

        OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);

        if(expectedStartOffsets != null) {
            assertEquals(expectedStartOffsets[index], offsets.startOffset());
        }

        if(expectedEndOffsets != null) {
            assertEquals(expectedEndOffsets[index], offsets.endOffset());
        }

        index++;
    }
    tokenStream.end();
}
 
开发者ID:open-korean-text,项目名称:elasticsearch-analysis-openkoreantext,代码行数:25,代码来源:TokenStreamAssertions.java

示例3: splitStringIntoTerms

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private String[] splitStringIntoTerms(String value) {
    try {
        List<String> results = new ArrayList<>();
        try (TokenStream tokens = analyzer.tokenStream("", value)) {
            CharTermAttribute term = tokens.getAttribute(CharTermAttribute.class);
            tokens.reset();
            while (tokens.incrementToken()) {
                String t = term.toString().trim();
                if (t.length() > 0) {
                    results.add(t);
                }
            }
        }
        return results.toArray(new String[results.size()]);
    } catch (IOException e) {
        throw new MemgraphException("Could not tokenize string: " + value, e);
    }
}
 
开发者ID:mware-solutions,项目名称:memory-graph,代码行数:19,代码来源:ElasticsearchSearchQueryBase.java

示例4: parse

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private List<TokenData> parse(String text) {
    NamedAnalyzer analyzer = getAnalysisService().indexAnalyzers.get("test");

    try {
        try (TokenStream ts = analyzer.tokenStream("test", new StringReader(text))) {
            List<TokenData> result = new ArrayList<>();
            CharTermAttribute charTerm = ts.addAttribute(CharTermAttribute.class);
            OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
            PositionIncrementAttribute position = ts.addAttribute(PositionIncrementAttribute.class);
            ts.reset();
            while (ts.incrementToken()) {
                String original = text.substring(offset.startOffset(), offset.endOffset());
                result.add(token(original, charTerm.toString(), position.getPositionIncrement()));
            }
            ts.end();

            return result;
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
 
开发者ID:EvidentSolutions,项目名称:elasticsearch-analysis-voikko,代码行数:23,代码来源:VoikkoTokenFilterTests.java

示例5: parseQueryString

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer,
                                     String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
    // Logic similar to QueryParser#getFieldQuery
    try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
        source.reset();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        BytesRefBuilder builder = new BytesRefBuilder();
        while (source.incrementToken()) {
            // UTF-8
            builder.copyChars(termAtt);
            query.add(new Term(field, builder.toBytesRef()));
        }
    }

    query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
    query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
    return query;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:CommonTermsQueryBuilder.java

示例6: testSimple

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public void testSimple() throws IOException {
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(t, new UniqueTokenFilter(t));
        }
    };

    TokenStream test = analyzer.tokenStream("test", "this test with test");
    test.reset();
    CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("this"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("test"));

    assertThat(test.incrementToken(), equalTo(true));
    assertThat(termAttribute.toString(), equalTo("with"));

    assertThat(test.incrementToken(), equalTo(false));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:24,代码来源:UniqueTokenFilterTests.java

示例7: analyze

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
    AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
        }
    }));
    IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
    Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();

    AllEntries allEntries = new AllEntries();
    allEntries.addText("field1", text, 1.0f);

    TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
    stream.reset();
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);

    List<String> terms = new ArrayList<>();
    while (stream.incrementToken()) {
        String tokText = termAtt.toString();
        terms.add(tokText);
    }
    return terms;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:CompoundAnalysisTests.java

示例8: createComponents

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer tokenizer = new Tokenizer() {
        boolean incremented = false;
        CharTermAttribute term = addAttribute(CharTermAttribute.class);

        @Override
        public boolean incrementToken() throws IOException {
            if (incremented) {
                return false;
            }
            term.setLength(0).append(output);
            incremented = true;
            return true;
        }
    };
    return new TokenStreamComponents(tokenizer);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:DocumentFieldMapperTests.java

示例9: testToken

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private void testToken(String source, String expected) throws IOException {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put("index.analysis.filter.myStemmer.type", "polish_stem")
            .build();
    TestAnalysis analysis = createTestAnalysis(index, settings, new AnalysisStempelPlugin());

    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myStemmer");

    Tokenizer tokenizer = new KeywordTokenizer();
    tokenizer.setReader(new StringReader(source));
    TokenStream ts = filterFactory.create(tokenizer);

    CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    assertThat(ts.incrementToken(), equalTo(true));

    assertThat(term1.toString(), equalTo(expected));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimplePolishTokenFilterTests.java

示例10: assertCollation

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
    CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
    CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);

    stream1.reset();
    stream2.reset();

    assertThat(stream1.incrementToken(), equalTo(true));
    assertThat(stream2.incrementToken(), equalTo(true));
    assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
    assertThat(stream1.incrementToken(), equalTo(false));
    assertThat(stream2.incrementToken(), equalTo(false));

    stream1.end();
    stream2.end();

    stream1.close();
    stream2.close();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimpleIcuCollationTokenFilterTests.java

示例11: analyzeString

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
/**
 * analyzes string like the given field
 * @param field the name of the field
 * @param value the string to analyze
 * @return the analyzed string
 */
public static String analyzeString(SolrCore core, String field, String value) {
    try {
        StringBuilder b = new StringBuilder();
        try (TokenStream ts = core.getLatestSchema().getFieldType(field).getQueryAnalyzer().tokenStream(field, new StringReader(value))) {
            ts.reset();
            while (ts.incrementToken()) {
                b.append(" ");
                CharTermAttribute attr = ts.getAttribute(CharTermAttribute.class);
                b.append(attr);
            }
        }

        return b.toString().trim();
    } catch (IOException e) {
        //FIXME: This error should be properly logged!
        e.printStackTrace();
        return value;
    }
}
 
开发者ID:RBMHTechnology,项目名称:vind,代码行数:26,代码来源:FieldAnalyzerService.java

示例12: after

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
@After
public void after(){

    if(analyzer != null){
        try {
            TokenStream ts = analyzer.tokenStream("field", text);
            CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int i = 0;
            while (ts.incrementToken()) {
                i++;
                System.out.print(ch.toString() + "\t");
                if(i % 7 == 0){
                    System.out.println();
                }
            }
            ts.end();
            ts.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
 
开发者ID:followwwind,项目名称:apache,代码行数:24,代码来源:AnalyzerTest.java

示例13: splitByTokenizer

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
  StringReader reader = new StringReader( source );
  TokenStream ts = loadTokenizer(tokFactory, reader);
  List<String> tokList = new ArrayList<>();
  try {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()){
      if( termAtt.length() > 0 )
        tokList.add( termAtt.toString() );
    }
  } finally{
    reader.close();
  }
  return tokList;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:17,代码来源:SlowSynonymFilterFactory.java

示例14: accept

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
@Override
public boolean accept(AttributeSource source) {
  if (termAtt == null) {
    termAtt = source.addAttribute(CharTermAttribute.class);
  }
  try {
    Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
    if (date != null) {
      return true;
    }
  } catch (ParseException e) {

  }
  
  return false;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:17,代码来源:DateRecognizerSinkFilter.java

示例15: PrefixAwareTokenFilter

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;
  
  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:21,代码来源:PrefixAwareTokenFilter.java


注:本文中的org.apache.lucene.analysis.tokenattributes.CharTermAttribute类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。