本文整理匯總了Java中org.apache.lucene.analysis.tokenattributes.OffsetAttribute.endOffset方法的典型用法代碼示例。如果您正苦於以下問題:Java OffsetAttribute.endOffset方法的具體用法?Java OffsetAttribute.endOffset怎麽用?Java OffsetAttribute.endOffset使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.lucene.analysis.tokenattributes.OffsetAttribute
的用法示例。
在下文中一共展示了OffsetAttribute.endOffset方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: findGoodEndForNoHighlightExcerpt
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents)
throws IOException {
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
tokenStream.end();
// We've exhausted the token stream so we should just highlight everything.
return end;
}
}
示例2: assertOffsets
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
static private void assertOffsets(String inputStr, TokenStream tokenStream, List<String> expected) {
try {
List<String> termList = new ArrayList<String>();
// CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttr = tokenStream.addAttribute(OffsetAttribute.class);
while (tokenStream.incrementToken()) {
int start = offsetAttr.startOffset();
int end = offsetAttr.endOffset();
termList.add(inputStr.substring(start, end));
}
System.out.println(String.join(" ", termList));
assertThat(termList, is(expected));
} catch (IOException e) {
assertTrue(false);
}
}
示例3: findGoodEndForNoHighlightExcerpt
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
tokenStream.end();
// We've exhausted the token stream so we should just highlight everything.
return end;
}
}
示例4: displayTokens
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
protected void displayTokens(String text, String elementId) throws IOException {
if (log.isDebugEnabled()) {
Analyzer analyzer = getConfiguredAnalyzer();
StringBuilder sb = new StringBuilder();
sb.append(elementId).append(": ").append(text).append(": ");
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(text));
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
int startOffset = offsetAttribute.startOffset();
int endOffset = offsetAttribute.endOffset();
String term = charTermAttribute.toString();
sb.append("[" + term + "](" + startOffset + "," + endOffset + ") ");
}
log.debug(sb);
}
}
示例5: walkTokens
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private String[] walkTokens() throws IOException {
List<String> wordList = new ArrayList<>();
while (input.incrementToken()) {
CharTermAttribute textAtt = input.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
char[] buffer = textAtt.buffer();
String word = new String(buffer, 0, offsetAtt.endOffset() - offsetAtt.startOffset());
wordList.add(word);
AttributeSource attrs = input.cloneAttributes();
tokenAttrs.add(attrs);
}
String[] words = new String[wordList.size()];
for (int i = 0; i < words.length; i++) {
words[i] = wordList.get(i);
}
return words;
}
示例6: handleTokenStream
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private void handleTokenStream(Map<Integer, List<Token>> tokenPosMap, TokenStream tokenStream) throws IOException {
tokenStream.reset();
int pos = 0;
CharTermAttribute charTermAttribute = getCharTermAttribute(tokenStream);
OffsetAttribute offsetAttribute = getOffsetAttribute(tokenStream);
TypeAttribute typeAttribute = getTypeAttribute(tokenStream);
PositionIncrementAttribute positionIncrementAttribute = getPositionIncrementAttribute(tokenStream);
while (tokenStream.incrementToken()) {
if (null == charTermAttribute || null == offsetAttribute) {
return;
}
Token token = new Token(charTermAttribute.buffer(), 0, charTermAttribute.length(),
offsetAttribute.startOffset(), offsetAttribute.endOffset());
if (null != typeAttribute) {
token.setType(typeAttribute.type());
}
pos += null != positionIncrementAttribute ? positionIncrementAttribute.getPositionIncrement() : 1;
if (!tokenPosMap.containsKey(pos)) {
tokenPosMap.put(pos, new LinkedList<Token>());
}
tokenPosMap.get(pos).add(token);
}
tokenStream.close();
}
示例7: emit
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private void emit( char[] token ) {
System.out.println( "emit: " + new String( token ) );
if (replaceWhitespaceWith != null) {
token = replaceWhiteSpace( token );
}
CharTermAttribute termAttr = getTermAttribute( );
termAttr.setEmpty( );
termAttr.append( new StringBuilder( ).append( token ) );
OffsetAttribute offAttr = getOffsetAttribute( );
if (offAttr != null && offAttr.endOffset() >= token.length){
int start = offAttr.endOffset() - token.length;
offAttr.setOffset( start, offAttr.endOffset());
}
PositionIncrementAttribute pia = getPositionIncrementAttribute( );
if (pia != null) {
pia.setPositionIncrement( ++positionIncr );
}
lastEmitted = token;
}
示例8: emit
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private void emit(char[] tokenChars) {
char[] token = tokenChars;
if (replaceWhitespaceWith != null) {
token = replaceWhiteSpace(token);
}
CharTermAttribute termAttr = getTermAttribute();
if (termAttr != null) {
termAttr.setEmpty();
termAttr.append(new StringBuilder().append(token));
}
OffsetAttribute offAttr = getOffsetAttribute();
if (offAttr != null && offAttr.endOffset() >= token.length) {
int start = offAttr.endOffset() - token.length;
offAttr.setOffset(start, offAttr.endOffset());
}
PositionIncrementAttribute pia = getPositionIncrementAttribute();
if (pia != null) {
pia.setPositionIncrement(++positionIncr);
}
lastEmitted = token;
}
示例9: searchSingleWord
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
/**
* Searches for a single word and updates the ranges.
*
* @param tokenizer - The Lucene Tokenizer with the complete text of chapter.
* @param searchString - The word to search for.
* @param currentChapter - The chapter to search in.
* @param ranges - The ranges of the found words will be added here.
* @param documentLength - The length of the whole document.
* @throws IOException
*/
private void searchSingleWord(Tokenizer tokenizer, String searchString, Chapter currentChapter,
List<Range> ranges, int documentLength) throws IOException {
// will be incremented
CharTermAttribute charTermAttrib = tokenizer.getAttribute(CharTermAttribute.class);
OffsetAttribute offset = tokenizer.getAttribute(OffsetAttribute.class);
tokenizer.reset();
while (tokenizer.incrementToken()) {
if (charTermAttrib.toString().toLowerCase().matches(searchString.toLowerCase())) {
int startOffset = offset.startOffset() + currentChapter.getRange().getStart().getOffset();
int endOffset = offset.endOffset() + currentChapter.getRange().getStart().getOffset();
ranges.add(new Range(TextPosition.fromGlobalOffset(startOffset, documentLength),
TextPosition.fromGlobalOffset(endOffset, documentLength)));
}
}
}
示例10: compare
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
@Override
public int compare(OffsetAttribute offsetA, OffsetAttribute offsetB) {
int lenA = offsetA.endOffset() - offsetA.startOffset();
int lenB = offsetB.endOffset() - offsetB.startOffset();
if (lenA < lenB) {
return 1;
} else if (lenA > lenB) {
return -1;
// by here, the length is the same
} else if (offsetA.startOffset() < offsetB.startOffset()) {
return -1;
} else if (offsetA.startOffset() > offsetB.startOffset()) {
return 1;
}
return 0;
}
示例11: removeOverlapsAndSort
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
/**
* @param offsets offsets to process
* @param comparator initial OffsetLengthStartComparator to use to rule out overlaps
* @param startComparator comparator for final sort
* @return sorted list of offsets
*/
public static List<OffsetAttribute> removeOverlapsAndSort(
List<OffsetAttribute> offsets, OffsetLengthStartComparator comparator,
OffsetStartComparator startComparator) {
if (offsets == null || offsets.size() < 2)
return offsets;
Collections.sort(offsets, comparator);
Set<Integer> seen = new HashSet<>();
List<OffsetAttribute> filtered = new ArrayList<>();
for (OffsetAttribute offset : offsets) {
if (!alreadySeen(offset, seen)) {
filtered.add(offset);
for (int i = offset.startOffset(); i < offset.endOffset(); i++) {
seen.add(i);
}
}
}
if (startComparator != null) {
Collections.sort(filtered, startComparator);
}
return filtered;
}
示例12: simpleAnalyze
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
int lastPosition = -1;
int lastOffset = 0;
for (String text : request.text()) {
try (TokenStream stream = analyzer.tokenStream(field, text)) {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), null));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
}
}
return tokens;
}
示例13: analyze
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
try {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
示例14: simpleAnalyze
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
int lastPosition = -1;
int lastOffset = 0;
for (String text : request.text()) {
try (TokenStream stream = analyzer.tokenStream(field, text)) {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type(), null));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
}
}
return tokens;
}
示例15: analyze
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入方法依賴的package包/類
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
try {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}