本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.CharTermAttribute类的典型用法代码示例。如果您正苦于以下问题:Java CharTermAttribute类的具体用法?Java CharTermAttribute怎么用?Java CharTermAttribute使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
CharTermAttribute类属于org.apache.lucene.analysis.tokenattributes包,在下文中一共展示了CharTermAttribute类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
System.out.println(parse);
List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");
//System.out.println(parse1);
String text11="ZW321282050000000325";
Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt =
tokenizer.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAtt =
tokenizer.addAttribute(PositionIncrementAttribute.class);
tokenizer.reset();
while (tokenizer.incrementToken()){
System.out.print(new String(termAtt.toString()+" ") );
// System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
//System.out.print( positionIncrementAtt.getPositionIncrement() +"/");
}
tokenizer.close();
}
示例2: assertTokenStream
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
tokenStream.reset();
int index = 0;
while (tokenStream.incrementToken() == true) {
assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());
if(expectedTypes != null) {
assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
}
OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);
if(expectedStartOffsets != null) {
assertEquals(expectedStartOffsets[index], offsets.startOffset());
}
if(expectedEndOffsets != null) {
assertEquals(expectedEndOffsets[index], offsets.endOffset());
}
index++;
}
tokenStream.end();
}
开发者ID:open-korean-text,项目名称:elasticsearch-analysis-openkoreantext,代码行数:25,代码来源:TokenStreamAssertions.java
示例3: splitStringIntoTerms
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private String[] splitStringIntoTerms(String value) {
try {
List<String> results = new ArrayList<>();
try (TokenStream tokens = analyzer.tokenStream("", value)) {
CharTermAttribute term = tokens.getAttribute(CharTermAttribute.class);
tokens.reset();
while (tokens.incrementToken()) {
String t = term.toString().trim();
if (t.length() > 0) {
results.add(t);
}
}
}
return results.toArray(new String[results.size()]);
} catch (IOException e) {
throw new MemgraphException("Could not tokenize string: " + value, e);
}
}
示例4: parse
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private List<TokenData> parse(String text) {
NamedAnalyzer analyzer = getAnalysisService().indexAnalyzers.get("test");
try {
try (TokenStream ts = analyzer.tokenStream("test", new StringReader(text))) {
List<TokenData> result = new ArrayList<>();
CharTermAttribute charTerm = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute position = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String original = text.substring(offset.startOffset(), offset.endOffset());
result.add(token(original, charTerm.toString(), position.getPositionIncrement()));
}
ts.end();
return result;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例5: parseQueryString
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer,
String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
// Logic similar to QueryParser#getFieldQuery
try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
BytesRefBuilder builder = new BytesRefBuilder();
while (source.incrementToken()) {
// UTF-8
builder.copyChars(termAtt);
query.add(new Term(field, builder.toBytesRef()));
}
}
query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
return query;
}
示例6: testSimple
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public void testSimple() throws IOException {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer t = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(t, new UniqueTokenFilter(t));
}
};
TokenStream test = analyzer.tokenStream("test", "this test with test");
test.reset();
CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class);
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("this"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("test"));
assertThat(test.incrementToken(), equalTo(true));
assertThat(termAttribute.toString(), equalTo("with"));
assertThat(test.incrementToken(), equalTo(false));
}
示例7: analyze
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = new AnalysisModule(new Environment(settings), singletonList(new AnalysisPlugin() {
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("myfilter", MyFilterTokenFilterFactory::new);
}
}));
IndexAnalyzers indexAnalyzers = analysisModule.getAnalysisRegistry().build(idxSettings);
Analyzer analyzer = indexAnalyzers.get(analyzerName).analyzer();
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", text, 1.0f);
TokenStream stream = AllTokenStream.allTokenStream("_all", text, 1.0f, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<>();
while (stream.incrementToken()) {
String tokText = termAtt.toString();
terms.add(tokText);
}
return terms;
}
示例8: createComponents
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new Tokenizer() {
boolean incremented = false;
CharTermAttribute term = addAttribute(CharTermAttribute.class);
@Override
public boolean incrementToken() throws IOException {
if (incremented) {
return false;
}
term.setLength(0).append(output);
incremented = true;
return true;
}
};
return new TokenStreamComponents(tokenizer);
}
示例9: testToken
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private void testToken(String source, String expected) throws IOException {
Index index = new Index("test", "_na_");
Settings settings = Settings.builder()
.put("index.analysis.filter.myStemmer.type", "polish_stem")
.build();
TestAnalysis analysis = createTestAnalysis(index, settings, new AnalysisStempelPlugin());
TokenFilterFactory filterFactory = analysis.tokenFilter.get("myStemmer");
Tokenizer tokenizer = new KeywordTokenizer();
tokenizer.setReader(new StringReader(source));
TokenStream ts = filterFactory.create(tokenizer);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(expected));
}
示例10: assertCollation
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);
stream1.reset();
stream2.reset();
assertThat(stream1.incrementToken(), equalTo(true));
assertThat(stream2.incrementToken(), equalTo(true));
assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
assertThat(stream1.incrementToken(), equalTo(false));
assertThat(stream2.incrementToken(), equalTo(false));
stream1.end();
stream2.end();
stream1.close();
stream2.close();
}
示例11: analyzeString
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
/**
* analyzes string like the given field
* @param field the name of the field
* @param value the string to analyze
* @return the analyzed string
*/
public static String analyzeString(SolrCore core, String field, String value) {
try {
StringBuilder b = new StringBuilder();
try (TokenStream ts = core.getLatestSchema().getFieldType(field).getQueryAnalyzer().tokenStream(field, new StringReader(value))) {
ts.reset();
while (ts.incrementToken()) {
b.append(" ");
CharTermAttribute attr = ts.getAttribute(CharTermAttribute.class);
b.append(attr);
}
}
return b.toString().trim();
} catch (IOException e) {
//FIXME: This error should be properly logged!
e.printStackTrace();
return value;
}
}
示例12: after
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
@After
public void after(){
if(analyzer != null){
try {
TokenStream ts = analyzer.tokenStream("field", text);
CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
ts.reset();
int i = 0;
while (ts.incrementToken()) {
i++;
System.out.print(ch.toString() + "\t");
if(i % 7 == 0){
System.out.println();
}
}
ts.end();
ts.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
示例13: splitByTokenizer
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
StringReader reader = new StringReader( source );
TokenStream ts = loadTokenizer(tokFactory, reader);
List<String> tokList = new ArrayList<>();
try {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()){
if( termAtt.length() > 0 )
tokList.add( termAtt.toString() );
}
} finally{
reader.close();
}
return tokList;
}
示例14: accept
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
@Override
public boolean accept(AttributeSource source) {
if (termAtt == null) {
termAtt = source.addAttribute(CharTermAttribute.class);
}
try {
Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
if (date != null) {
return true;
}
} catch (ParseException e) {
}
return false;
}
示例15: PrefixAwareTokenFilter
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入依赖的package包/类
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
super(suffix);
this.suffix = suffix;
this.prefix = prefix;
prefixExhausted = false;
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
p_termAtt = prefix.addAttribute(CharTermAttribute.class);
p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
p_typeAtt = prefix.addAttribute(TypeAttribute.class);
p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}