本文整理汇总了Java中org.apache.lucene.analysis.TokenStream.end方法的典型用法代码示例。如果您正苦于以下问题:Java TokenStream.end方法的具体用法?Java TokenStream.end怎么用?Java TokenStream.end使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.analysis.TokenStream
的用法示例。
在下文中一共展示了TokenStream.end方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: assertTokenStream
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
tokenStream.reset();
int index = 0;
while (tokenStream.incrementToken() == true) {
assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());
if(expectedTypes != null) {
assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
}
OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);
if(expectedStartOffsets != null) {
assertEquals(expectedStartOffsets[index], offsets.startOffset());
}
if(expectedEndOffsets != null) {
assertEquals(expectedEndOffsets[index], offsets.endOffset());
}
index++;
}
tokenStream.end();
}
开发者ID:open-korean-text,项目名称:elasticsearch-analysis-openkoreantext,代码行数:25,代码来源:TokenStreamAssertions.java
示例2: assertCollation
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);
stream1.reset();
stream2.reset();
assertThat(stream1.incrementToken(), equalTo(true));
assertThat(stream2.incrementToken(), equalTo(true));
assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
assertThat(stream1.incrementToken(), equalTo(false));
assertThat(stream2.incrementToken(), equalTo(false));
stream1.end();
stream2.end();
stream1.close();
stream2.close();
}
示例3: after
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@After
public void after(){
if(analyzer != null){
try {
TokenStream ts = analyzer.tokenStream("field", text);
CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
ts.reset();
int i = 0;
while (ts.incrementToken()) {
i++;
System.out.print(ch.toString() + "\t");
if(i % 7 == 0){
System.out.println();
}
}
ts.end();
ts.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
示例4: getFilter
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Filter getFilter(Element e) throws ParserException {
List<BytesRef> terms = new ArrayList<>();
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, text);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
terms.add(BytesRef.deepCopyOf(bytes));
}
ts.end();
}
catch (IOException ioe) {
throw new RuntimeException("Error constructing terms from index:" + ioe);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
return new TermsFilter(fieldName, terms);
}
示例5: analyzeMultitermTerm
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
if (analyzerIn == null) analyzerIn = getAnalyzer();
TokenStream source = null;
try {
source = analyzerIn.tokenStream(field, part);
source.reset();
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
if (!source.incrementToken())
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
termAtt.fillBytesRef();
if (source.incrementToken())
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
source.end();
return BytesRef.deepCopyOf(bytes);
} catch (IOException e) {
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
} finally {
IOUtils.closeWhileHandlingException(source);
}
}
示例6: analyze
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
try {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
示例7: getSpanQuery
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
String value = DOMUtils.getNonBlankTextOrFail(e);
List<SpanQuery> clausesList = new ArrayList<>();
TokenStream ts = null;
try {
ts = analyzer.tokenStream(fieldName, value);
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
BytesRef bytes = termAtt.getBytesRef();
ts.reset();
while (ts.incrementToken()) {
termAtt.fillBytesRef();
SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
clausesList.add(stq);
}
ts.end();
SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
return soq;
}
catch (IOException ioe) {
throw new ParserException("IOException parsing value:" + value);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
}
示例8: analyze
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
try {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
示例9: termsFromTokenStream
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private String[] termsFromTokenStream(TokenStream stream) throws IOException {
List<String> outputTemp=new ArrayList<>();
CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
outputTemp.add(charTermAttribute.toString());
}
stream.end();
stream.close();
return outputTemp.toArray(new String[0]);
}
开发者ID:sebastian-hofstaetter,项目名称:ir-generalized-translation-models,代码行数:14,代码来源:SimilarityApiParser.java
示例10: analyzeSingleChunk
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/**
* Returns the analyzed form for the given chunk
*
* If the analyzer produces more than one output token from the given chunk,
* a ParseException is thrown.
*
* @param field The target field
* @param termStr The full term from which the given chunk is excerpted
* @param chunk The portion of the given termStr to be analyzed
* @return The result of analyzing the given chunk
* @throws ParseException when analysis returns other than one output token
*/
protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
String analyzed = null;
TokenStream stream = null;
try {
stream = getAnalyzer().tokenStream(field, chunk);
stream.reset();
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
// get first and hopefully only output token
if (stream.incrementToken()) {
analyzed = termAtt.toString();
// try to increment again, there should only be one output token
StringBuilder multipleOutputs = null;
while (stream.incrementToken()) {
if (null == multipleOutputs) {
multipleOutputs = new StringBuilder();
multipleOutputs.append('"');
multipleOutputs.append(analyzed);
multipleOutputs.append('"');
}
multipleOutputs.append(',');
multipleOutputs.append('"');
multipleOutputs.append(termAtt.toString());
multipleOutputs.append('"');
}
stream.end();
if (null != multipleOutputs) {
throw new ParseException(
String.format(getLocale(),
"Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
}
} else {
// nothing returned by analyzer. Was it a stop word and the user accidentally
// used an analyzer with stop words?
stream.end();
throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
}
} catch (IOException e){
throw new ParseException(
String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
} finally {
IOUtils.closeWhileHandlingException(stream);
}
return analyzed;
}
示例11: getQuery
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Query getQuery(Element e) throws ParserException {
String fieldsList = e.getAttribute("fieldNames"); //a comma-delimited list of fields
String fields[] = defaultFieldNames;
if ((fieldsList != null) && (fieldsList.trim().length() > 0)) {
fields = fieldsList.trim().split(",");
//trim the fieldnames
for (int i = 0; i < fields.length; i++) {
fields[i] = fields[i].trim();
}
}
//Parse any "stopWords" attribute
//TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then
//I use all analyzers/fields to generate multi-field compatible stop list
String stopWords = e.getAttribute("stopWords");
Set<String> stopWordsSet = null;
if ((stopWords != null) && (fields != null)) {
stopWordsSet = new HashSet<>();
for (String field : fields) {
TokenStream ts = null;
try {
ts = analyzer.tokenStream(field, stopWords);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
stopWordsSet.add(termAtt.toString());
}
ts.end();
} catch (IOException ioe) {
throw new ParserException("IoException parsing stop words list in "
+ getClass().getName() + ":" + ioe.getLocalizedMessage());
} finally {
IOUtils.closeWhileHandlingException(ts);
}
}
}
MoreLikeThisQuery mlt = new MoreLikeThisQuery(DOMUtils.getText(e), fields, analyzer, fields[0]);
mlt.setMaxQueryTerms(DOMUtils.getAttribute(e, "maxQueryTerms", DEFAULT_MAX_QUERY_TERMS));
mlt.setMinTermFrequency(DOMUtils.getAttribute(e, "minTermFrequency", DEFAULT_MIN_TERM_FREQUENCY));
mlt.setPercentTermsToMatch(DOMUtils.getAttribute(e, "percentTermsToMatch", DEFAULT_PERCENT_TERMS_TO_MATCH) / 100);
mlt.setStopWords(stopWordsSet);
int minDocFreq = DOMUtils.getAttribute(e, "minDocFreq", -1);
if (minDocFreq >= 0) {
mlt.setMinDocFreq(minDocFreq);
}
mlt.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
return mlt;
}
示例12: addTermWeights
import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/**
* Adds term weights found by tokenizing text from reader into the Map words
*
* @param reader a source of text to be tokenized
* @param termWeightMap a Map of terms and their weights
* @param fieldName Used by analyzer for any special per-field analysis
*/
private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName)
throws IOException {
if (analyzer == null) {
throw new UnsupportedOperationException("To use RelevancyFeedback without " +
"term vectors, you must provide an Analyzer");
}
TokenStream ts = analyzer.tokenStream(fieldName, reader);
try {
int tokenCount = 0;
// for every token
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String word = termAtt.toString();
tokenCount++;
if (tokenCount > maxNumTokensParsedPerField) {
break;
}
if(word.trim().length() == 0){
continue;
}
if (isNoiseWord(word)) {
continue;
}
BytesRef payload = payloadAttr.getPayload();
float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field
if(isPayloadField(fieldName) && payload != null){
tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
}
// increment frequency
Flt termWeight = termWeightMap.get(word);
if (termWeight == null) {
termWeightMap.put(word, new Flt(tokenWeight));
} else {
termWeight.x += tokenWeight;
}
}
ts.end();
} finally {
IOUtils.closeWhileHandlingException(ts);
}
}