本文整理匯總了Java中org.apache.lucene.analysis.TokenStream.getAttribute方法的典型用法代碼示例。如果您正苦於以下問題:Java TokenStream.getAttribute方法的具體用法?Java TokenStream.getAttribute怎麽用?Java TokenStream.getAttribute使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.lucene.analysis.TokenStream
的用法示例。
在下文中一共展示了TokenStream.getAttribute方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: assertTokenStream
import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
tokenStream.reset();
int index = 0;
while (tokenStream.incrementToken() == true) {
assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());
if(expectedTypes != null) {
assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
}
OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);
if(expectedStartOffsets != null) {
assertEquals(expectedStartOffsets[index], offsets.startOffset());
}
if(expectedEndOffsets != null) {
assertEquals(expectedEndOffsets[index], offsets.endOffset());
}
index++;
}
tokenStream.end();
}
開發者ID:open-korean-text,項目名稱:elasticsearch-analysis-openkoreantext,代碼行數:25,代碼來源:TokenStreamAssertions.java
示例2: analyzeMultitermTerm
import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
示例3: lemmatize
import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
protected String lemmatize(String query) {
ItalianAnalyzer analyzer = new ItalianAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("label", query);
StringBuilder sb = new StringBuilder();
CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
try {
tokenStream.reset();
while (tokenStream.incrementToken()) {
if (sb.length() > 0) {
sb.append(" ");
}
sb.append(token.toString());
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sb.toString();
}
示例4: analysisResult
import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
/**
* 打印出給定分詞器的分詞結果
*
* @param analyzer 分詞器
* @param keyWord 關鍵詞
* @throws Exception
*/
private static List<String> analysisResult(Analyzer analyzer, String keyWord)
throws Exception {
TokenStream tokenStream = analyzer.tokenStream("content",
new StringReader(keyWord));
tokenStream.addAttribute(CharTermAttribute.class);
List<String> stringList = new ArrayList<String>();
while (tokenStream.incrementToken()) {
CharTermAttribute charTermAttribute = tokenStream
.getAttribute(CharTermAttribute.class);
stringList.add(charTermAttribute.toString());
}
return stringList;
}
示例5: assertSimpleTSOutput
import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
public static void assertSimpleTSOutput(TokenStream stream,
String[] expected) throws IOException {
stream.reset();
CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
assertThat(termAttr, notNullValue());
int i = 0;
while (stream.incrementToken()) {
assertThat(expected.length, greaterThan(i));
assertThat( "expected different term at index " + i, expected[i++], equalTo(termAttr.toString()));
}
assertThat("not all tokens produced", i, equalTo(expected.length));
}
示例6: lemmatize
import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
public static String lemmatize(String query) {
StringBuilder sb = new StringBuilder();
ItalianAnalyzer analyzer = new ItalianAnalyzer(Version.LUCENE_44);
TokenStream tokenStream;
try {
tokenStream = analyzer.tokenStream("label", query);
CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
if (sb.length() > 0) {
sb.append(" ");
}
sb.append(token.toString());
}
analyzer.close();
} catch (IOException e) {
log.error(e.getMessage(), e);
sb = new StringBuilder();
sb.append(query);
}
return sb.toString();
}
示例7: analyzeSingleChunk
import org.apache.lucene.analysis.TokenStream; //導入方法依賴的package包/類
/**
* Returns the analyzed form for the given chunk
*
* If the analyzer produces more than one output token from the given chunk,
* a ParseException is thrown.
*
* @param field The target field
* @param termStr The full term from which the given chunk is excerpted
* @param chunk The portion of the given termStr to be analyzed
* @return The result of analyzing the given chunk
* @throws ParseException when analysis returns other than one output token
*/
protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
String analyzed = null;
TokenStream stream = null;
try {
stream = getAnalyzer().tokenStream(field, chunk);
stream.reset();
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
// get first and hopefully only output token
if (stream.incrementToken()) {
analyzed = termAtt.toString();
// try to increment again, there should only be one output token
StringBuilder multipleOutputs = null;
while (stream.incrementToken()) {
if (null == multipleOutputs) {
multipleOutputs = new StringBuilder();
multipleOutputs.append('"');
multipleOutputs.append(analyzed);
multipleOutputs.append('"');
}
multipleOutputs.append(',');
multipleOutputs.append('"');
multipleOutputs.append(termAtt.toString());
multipleOutputs.append('"');
}
stream.end();
if (null != multipleOutputs) {
throw new ParseException(
String.format(getLocale(),
"Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
}
} else {
// nothing returned by analyzer. Was it a stop word and the user accidentally
// used an analyzer with stop words?
stream.end();
throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
}
} catch (IOException e){
throw new ParseException(
String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
} finally {
IOUtils.closeWhileHandlingException(stream);
}
return analyzed;
}