本文整理匯總了Java中org.apache.lucene.analysis.tokenattributes.OffsetAttribute類的典型用法代碼示例。如果您正苦於以下問題:Java OffsetAttribute類的具體用法?Java OffsetAttribute怎麽用?Java OffsetAttribute使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
OffsetAttribute類屬於org.apache.lucene.analysis.tokenattributes包,在下文中一共展示了OffsetAttribute類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: main
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
public static void main(String[] args) throws IOException {
List<Term> parse = ToAnalysis.parse("中華人民 共和國 成立了 ");
System.out.println(parse);
List<Term> parse1 = IndexAnalysis.parse("你吃過飯了沒有!!!!!吃過無妨論文");
//System.out.println(parse1);
String text11="ZW321282050000000325";
Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt =
tokenizer.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAtt =
tokenizer.addAttribute(PositionIncrementAttribute.class);
tokenizer.reset();
while (tokenizer.incrementToken()){
System.out.print(new String(termAtt.toString()+" ") );
// System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
//System.out.print( positionIncrementAtt.getPositionIncrement() +"/");
}
tokenizer.close();
}
示例2: assertTokenStream
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
tokenStream.reset();
int index = 0;
while (tokenStream.incrementToken() == true) {
assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());
if(expectedTypes != null) {
assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
}
OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);
if(expectedStartOffsets != null) {
assertEquals(expectedStartOffsets[index], offsets.startOffset());
}
if(expectedEndOffsets != null) {
assertEquals(expectedEndOffsets[index], offsets.endOffset());
}
index++;
}
tokenStream.end();
}
開發者ID:open-korean-text,項目名稱:elasticsearch-analysis-openkoreantext,代碼行數:25,代碼來源:TokenStreamAssertions.java
示例3: parse
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
private List<TokenData> parse(String text) {
NamedAnalyzer analyzer = getAnalysisService().indexAnalyzers.get("test");
try {
try (TokenStream ts = analyzer.tokenStream("test", new StringReader(text))) {
List<TokenData> result = new ArrayList<>();
CharTermAttribute charTerm = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute position = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String original = text.substring(offset.startOffset(), offset.endOffset());
result.add(token(original, charTerm.toString(), position.getPositionIncrement()));
}
ts.end();
return result;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例4: findGoodEndForNoHighlightExcerpt
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents)
throws IOException {
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
tokenStream.end();
// We've exhausted the token stream so we should just highlight everything.
return end;
}
}
示例5: PrefixAwareTokenFilter
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
super(suffix);
this.suffix = suffix;
this.prefix = prefix;
prefixExhausted = false;
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
p_termAtt = prefix.addAttribute(CharTermAttribute.class);
p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
p_typeAtt = prefix.addAttribute(TypeAttribute.class);
p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
示例6: assertOffsets
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
static private void assertOffsets(String inputStr, TokenStream tokenStream, List<String> expected) {
try {
List<String> termList = new ArrayList<String>();
// CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttr = tokenStream.addAttribute(OffsetAttribute.class);
while (tokenStream.incrementToken()) {
int start = offsetAttr.startOffset();
int end = offsetAttr.endOffset();
termList.add(inputStr.substring(start, end));
}
System.out.println(String.join(" ", termList));
assertThat(termList, is(expected));
} catch (IOException e) {
assertTrue(false);
}
}
示例7: findGoodEndForNoHighlightExcerpt
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets
return -1;
}
int end = -1;
tokenStream.reset();
while (tokenStream.incrementToken()) {
OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
if (attr.endOffset() >= noMatchSize) {
// Jump to the end of this token if it wouldn't put us past the boundary
if (attr.endOffset() == noMatchSize) {
end = noMatchSize;
}
return end;
}
end = attr.endOffset();
}
tokenStream.end();
// We've exhausted the token stream so we should just highlight everything.
return end;
}
}
示例8: LTPTokenizer
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
/**
* Lucene constructor
*
* @throws UnirestException
* @throws JSONException
* @throws IOException
*/
public LTPTokenizer(Set<String> filter)
throws IOException, JSONException, UnirestException {
super();
logger.info("LTPTokenizer Initialize......");
// Add token offset attribute
offsetAttr = addAttribute(OffsetAttribute.class);
// Add token content attribute
charTermAttr = addAttribute(CharTermAttribute.class);
// Add token type attribute
typeAttr = addAttribute(TypeAttribute.class);
// Add token position attribute
piAttr = addAttribute(PositionIncrementAttribute.class);
// Create a new word segmenter to get tokens
LTPSeg = new LTPWordSegmenter(input);
// Add filter words set
this.filter = filter;
}
示例9: testSearch
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
@Test
public void testSearch() throws IOException {
LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
TokenStream tokenStream = analyzer.tokenStream("lc", "重qing");
CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);
tokenStream.reset();
Assert.assertTrue(tokenStream.incrementToken());
Assert.assertEquals(charTermAttribute.toString(), "重");
Assert.assertEquals(offsetAttribute.startOffset(), 0);
Assert.assertEquals(offsetAttribute.endOffset(), 1);
Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);
Assert.assertTrue(tokenStream.incrementToken());
Assert.assertEquals(charTermAttribute.toString(), "qing");
Assert.assertEquals(offsetAttribute.startOffset(), 1);
Assert.assertEquals(offsetAttribute.endOffset(), 5);
Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);
tokenStream.close();
}
示例10: testFullPinyinFilter
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
public void testFullPinyinFilter() throws IOException {
LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陳楠");
LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.full_pinyin);
CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);
lcPinyinTokenFilter.reset();
while (lcPinyinTokenFilter.incrementToken()) {
System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
}
lcPinyinTokenFilter.close();
}
示例11: testFirstLetterFilter
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
public void testFirstLetterFilter() throws IOException {
LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陳楠");
LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.first_letter);
CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);
lcPinyinTokenFilter.reset();
while (lcPinyinTokenFilter.incrementToken()) {
System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
}
lcPinyinTokenFilter.close();
}
示例12: copyTo
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
@Override
public void copyTo(AttributeImpl target) {
if (target instanceof Token) {
final Token to = (Token) target;
to.reinit(this);
// reinit shares the payload, so clone it:
if (payload !=null) {
to.payload = payload.clone();
}
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
((FlagsAttribute) target).setFlags(flags);
((TypeAttribute) target).setType(type);
}
}
示例13: WrappedTokenStream
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
public WrappedTokenStream(TokenStream tokenStream, String pText) {
this.pText = pText;
this.tokenStream = tokenStream;
if(tokenStream.hasAttribute(CharTermAttribute.class)) {
charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
}
if(tokenStream.hasAttribute(OffsetAttribute.class)) {
offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
}
if(tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
charsRefTermAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
}
if(tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
}
additionalTermAttributeLocal.init(this);
}
示例14: testBulk
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
@Test
public void testBulk() throws IOException {
String str = "";
str = "SK, 하이닉스";
//str = "하이닉스";
StringReader input = new StringReader(str);
CSVAnalyzer analyzer = new CSVAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("", input);
tokenStream.reset();
logger.debug("tokenStream:{}", tokenStream);
CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
for(int inx=0;tokenStream.incrementToken();inx++) {
String term = charTermAttribute.toString();
logger.debug("[{}] \"{}\" {}~{}", inx, term, offsetAttribute.startOffset(), offsetAttribute.endOffset());
}
analyzer.close();
}
示例15: PinyinTransformTokenFilter
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; //導入依賴的package包/類
/**
* @param input 詞元輸入
* @param type 輸出拚音縮寫還是完整拚音 可取值:{@link #TYPE_ABBREVIATION}、{@link #TYPE_PINYIN}、{@link #TYPE_BOTH}
* @param minTermLength 中文詞組過濾長度
* @param maxPolyphoneFreq 多音字出現最大次數
* @param isOutChinese 是否輸入原中文詞元
*/
public PinyinTransformTokenFilter(TokenStream input, int type,
int minTermLength, int maxPolyphoneFreq, boolean isOutChinese) {
super(input);
this._minTermLength = minTermLength;
this.maxPolyphoneFreq = maxPolyphoneFreq;
if (this._minTermLength < 1) {
this._minTermLength = 1;
}
if (this.maxPolyphoneFreq < 1) {
this.maxPolyphoneFreq = Integer.MAX_VALUE;
}
this.isOutChinese = isOutChinese;
this.outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
this.outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
this.type = type;
addAttribute(OffsetAttribute.class); // 偏移量屬性
}