当前位置: 首页>>代码示例>>Java>>正文


Java OffsetAttribute.endOffset方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.OffsetAttribute.endOffset方法的典型用法代码示例。如果您正苦于以下问题:Java OffsetAttribute.endOffset方法的具体用法?Java OffsetAttribute.endOffset怎么用?Java OffsetAttribute.endOffset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.tokenattributes.OffsetAttribute的用法示例。


在下文中一共展示了OffsetAttribute.endOffset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: findGoodEndForNoHighlightExcerpt

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents)
        throws IOException {
    try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
        if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
            // Can't split on term boundaries without offsets
            return -1;
        }
        int end = -1;
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
            if (attr.endOffset() >= noMatchSize) {
                // Jump to the end of this token if it wouldn't put us past the boundary
                if (attr.endOffset() == noMatchSize) {
                    end = noMatchSize;
                }
                return end;
            }
            end = attr.endOffset();
        }
        tokenStream.end();
        // We've exhausted the token stream so we should just highlight everything.
        return end;
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:PlainHighlighter.java

示例2: assertOffsets

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
static private void assertOffsets(String inputStr, TokenStream tokenStream, List<String> expected) {
    try {
        List<String> termList = new ArrayList<String>();
        // CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttr = tokenStream.addAttribute(OffsetAttribute.class);
        while (tokenStream.incrementToken()) {
            int start = offsetAttr.startOffset();
            int end = offsetAttr.endOffset();
            termList.add(inputStr.substring(start, end));
        }
        System.out.println(String.join(" ", termList));
        assertThat(termList, is(expected));
    } catch (IOException e) {
        assertTrue(false);
    }
}
 
开发者ID:BuddhistDigitalResourceCenter,项目名称:lucene-bo,代码行数:17,代码来源:TibetanAnalyzerTest.java

示例3: findGoodEndForNoHighlightExcerpt

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
    try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
        if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
            // Can't split on term boundaries without offsets
            return -1;
        }
        int end = -1;
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
            if (attr.endOffset() >= noMatchSize) {
                // Jump to the end of this token if it wouldn't put us past the boundary
                if (attr.endOffset() == noMatchSize) {
                    end = noMatchSize;
                }
                return end;
            }
            end = attr.endOffset();
        }
        tokenStream.end();
        // We've exhausted the token stream so we should just highlight everything.
        return end;
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:25,代码来源:PlainHighlighter.java

示例4: displayTokens

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
protected void displayTokens(String text, String elementId) throws IOException {
	if (log.isDebugEnabled()) {
		Analyzer analyzer = getConfiguredAnalyzer();
		StringBuilder sb = new StringBuilder();
		sb.append(elementId).append(": ").append(text).append(": ");

		TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(text));
		CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
		OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);

		tokenStream.reset();
		while (tokenStream.incrementToken()) {
			int startOffset = offsetAttribute.startOffset();
			int endOffset = offsetAttribute.endOffset();
			String term = charTermAttribute.toString();
			sb.append("[" + term + "](" + startOffset + "," + endOffset + ") ");
		}

		log.debug(sb);
	}
}
 
开发者ID:TIBCOSoftware,项目名称:jasperreports,代码行数:22,代码来源:LuceneUtil.java

示例5: walkTokens

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private String[] walkTokens() throws IOException {
    List<String> wordList = new ArrayList<>();
    while (input.incrementToken()) {
        CharTermAttribute textAtt = input.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
        char[] buffer = textAtt.buffer();
        String word = new String(buffer, 0, offsetAtt.endOffset() - offsetAtt.startOffset());
        wordList.add(word);
        AttributeSource attrs = input.cloneAttributes();
        tokenAttrs.add(attrs);
    }
    String[] words = new String[wordList.size()];
    for (int i = 0; i < words.length; i++) {
        words[i] = wordList.get(i);
    }
    return words;
}
 
开发者ID:jprante,项目名称:elasticsearch-analysis-opennlp,代码行数:18,代码来源:OpenNLPTokenFilter.java

示例6: handleTokenStream

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private void handleTokenStream(Map<Integer, List<Token>> tokenPosMap, TokenStream tokenStream) throws IOException {
    tokenStream.reset();
    int pos = 0;

    CharTermAttribute charTermAttribute = getCharTermAttribute(tokenStream);
    OffsetAttribute offsetAttribute = getOffsetAttribute(tokenStream);
    TypeAttribute typeAttribute = getTypeAttribute(tokenStream);
    PositionIncrementAttribute positionIncrementAttribute = getPositionIncrementAttribute(tokenStream);

    while (tokenStream.incrementToken()) {
        if (null == charTermAttribute || null == offsetAttribute) {
            return;
        }
        Token token = new Token(charTermAttribute.buffer(), 0, charTermAttribute.length(),
                offsetAttribute.startOffset(), offsetAttribute.endOffset());
        if (null != typeAttribute) {
            token.setType(typeAttribute.type());
        }
        pos += null != positionIncrementAttribute ? positionIncrementAttribute.getPositionIncrement() : 1;
        if (!tokenPosMap.containsKey(pos)) {
            tokenPosMap.put(pos, new LinkedList<Token>());
        }
        tokenPosMap.get(pos).add(token);
    }
    tokenStream.close();
}
 
开发者ID:smalldirector,项目名称:solr-multilingual-analyzer,代码行数:27,代码来源:MultiLangTokenizer.java

示例7: emit

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private void emit( char[] token ) {
System.out.println( "emit: " + new String( token ) );
if (replaceWhitespaceWith != null) {
	token = replaceWhiteSpace( token );
}
CharTermAttribute termAttr = getTermAttribute( );
termAttr.setEmpty( );
termAttr.append( new StringBuilder( ).append( token ) );

OffsetAttribute offAttr = getOffsetAttribute( );
if (offAttr != null && offAttr.endOffset() >= token.length){ 
  int start = offAttr.endOffset() - token.length;
  offAttr.setOffset( start, offAttr.endOffset());
}

PositionIncrementAttribute pia = getPositionIncrementAttribute( );
if (pia != null) {
	pia.setPositionIncrement( ++positionIncr );
}

lastEmitted = token;
 }
 
开发者ID:lucidworks,项目名称:auto-phrase-tokenfilter,代码行数:23,代码来源:AutoPhrasingTokenFilter.java

示例8: emit

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private void emit(char[] tokenChars) {
    char[] token = tokenChars;
    if (replaceWhitespaceWith != null) {
        token = replaceWhiteSpace(token);
    }
    CharTermAttribute termAttr = getTermAttribute();
    if (termAttr != null) {
        termAttr.setEmpty();
        termAttr.append(new StringBuilder().append(token));
    }
    OffsetAttribute offAttr = getOffsetAttribute();
    if (offAttr != null && offAttr.endOffset() >= token.length) {
        int start = offAttr.endOffset() - token.length;
        offAttr.setOffset(start, offAttr.endOffset());
    }
    PositionIncrementAttribute pia = getPositionIncrementAttribute();
    if (pia != null) {
        pia.setPositionIncrement(++positionIncr);
    }
    lastEmitted = token;
}
 
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:22,代码来源:AutoPhrasingTokenFilter.java

示例9: searchSingleWord

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
/**
 * Searches for a single word and updates the ranges.
 * 
 * @param tokenizer - The Lucene Tokenizer with the complete text of chapter.
 * @param searchString - The word to search for.
 * @param currentChapter - The chapter to search in.
 * @param ranges - The ranges of the found words will be added here.
 * @param documentLength - The length of the whole document.
 * @throws IOException
 */
private void searchSingleWord(Tokenizer tokenizer, String searchString, Chapter currentChapter,
    List<Range> ranges, int documentLength) throws IOException {
  // will be incremented
  CharTermAttribute charTermAttrib = tokenizer.getAttribute(CharTermAttribute.class);
  OffsetAttribute offset = tokenizer.getAttribute(OffsetAttribute.class);

  tokenizer.reset();
  while (tokenizer.incrementToken()) {
    if (charTermAttrib.toString().toLowerCase().matches(searchString.toLowerCase())) {
      int startOffset = offset.startOffset() + currentChapter.getRange().getStart().getOffset();
      int endOffset = offset.endOffset() + currentChapter.getRange().getStart().getOffset();

      ranges.add(new Range(TextPosition.fromGlobalOffset(startOffset, documentLength),
          TextPosition.fromGlobalOffset(endOffset, documentLength)));
    }
  }
}
 
开发者ID:vita-us,项目名称:ViTA,代码行数:28,代码来源:Searcher.java

示例10: compare

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
@Override
public int compare(OffsetAttribute offsetA, OffsetAttribute offsetB) {

  int lenA = offsetA.endOffset() - offsetA.startOffset();
  int lenB = offsetB.endOffset() - offsetB.startOffset();
  if (lenA < lenB) {
    return 1;
  } else if (lenA > lenB) {
    return -1;
    // by here, the length is the same
  } else if (offsetA.startOffset() < offsetB.startOffset()) {
    return -1;
  } else if (offsetA.startOffset() > offsetB.startOffset()) {
    return 1;
  }
  return 0;
}
 
开发者ID:tballison,项目名称:lucene-addons,代码行数:18,代码来源:OffsetLengthStartComparator.java

示例11: removeOverlapsAndSort

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
/**
 * @param offsets         offsets to process
 * @param comparator      initial OffsetLengthStartComparator to use to rule out overlaps
 * @param startComparator comparator for final sort
 * @return sorted list of offsets
 */
public static List<OffsetAttribute> removeOverlapsAndSort(
    List<OffsetAttribute> offsets, OffsetLengthStartComparator comparator,
    OffsetStartComparator startComparator) {
  if (offsets == null || offsets.size() < 2)
    return offsets;

  Collections.sort(offsets, comparator);
  Set<Integer> seen = new HashSet<>();
  List<OffsetAttribute> filtered = new ArrayList<>();
  for (OffsetAttribute offset : offsets) {
    if (!alreadySeen(offset, seen)) {
      filtered.add(offset);
      for (int i = offset.startOffset(); i < offset.endOffset(); i++) {
        seen.add(i);
      }
    }
  }
  if (startComparator != null) {
    Collections.sort(filtered, startComparator);
  }
  return filtered;
}
 
开发者ID:tballison,项目名称:lucene-addons,代码行数:29,代码来源:OffsetUtil.java

示例12: simpleAnalyze

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
    List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
    int lastPosition = -1;
    int lastOffset = 0;
    for (String text : request.text()) {
        try (TokenStream stream = analyzer.tokenStream(field, text)) {
            stream.reset();
            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
            OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
            TypeAttribute type = stream.addAttribute(TypeAttribute.class);
            PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

            while (stream.incrementToken()) {
                int increment = posIncr.getPositionIncrement();
                if (increment > 0) {
                    lastPosition = lastPosition + increment;
                }
                tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                    lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), null));

            }
            stream.end();
            lastOffset += offset.endOffset();
            lastPosition += posIncr.getPositionIncrement();

            lastPosition += analyzer.getPositionIncrementGap(field);
            lastOffset += analyzer.getOffsetGap(field);
        } catch (IOException e) {
            throw new ElasticsearchException("failed to analyze", e);
        }
    }
    return tokens;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:35,代码来源:TransportAnalyzeAction.java

示例13: analyze

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);
        PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:TransportAnalyzeAction.java

示例14: simpleAnalyze

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
    List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
    int lastPosition = -1;
    int lastOffset = 0;
    for (String text : request.text()) {
        try (TokenStream stream = analyzer.tokenStream(field, text)) {
            stream.reset();
            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
            OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
            TypeAttribute type = stream.addAttribute(TypeAttribute.class);

            while (stream.incrementToken()) {
                int increment = posIncr.getPositionIncrement();
                if (increment > 0) {
                    lastPosition = lastPosition + increment;
                }
                tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type(), null));

            }
            stream.end();
            lastOffset += offset.endOffset();
            lastPosition += posIncr.getPositionIncrement();

            lastPosition += analyzer.getPositionIncrementGap(field);
            lastOffset += analyzer.getOffsetGap(field);
        } catch (IOException e) {
            throw new ElasticsearchException("failed to analyze", e);
        }
    }
    return tokens;
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:33,代码来源:TransportAnalyzeAction.java

示例15: analyze

import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:31,代码来源:TransportAnalyzeAction.java


注:本文中的org.apache.lucene.analysis.tokenattributes.OffsetAttribute.endOffset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。