本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute类的典型用法代码示例。如果您正苦于以下问题:Java PositionIncrementAttribute类的具体用法?Java PositionIncrementAttribute怎么用?Java PositionIncrementAttribute使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
PositionIncrementAttribute类属于org.apache.lucene.analysis.tokenattributes包,在下文中一共展示了PositionIncrementAttribute类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
List<Term> parse = ToAnalysis.parse("中华人民 共和国 成立了 ");
System.out.println(parse);
List<Term> parse1 = IndexAnalysis.parse("你吃过饭了没有!!!!!吃过无妨论文");
//System.out.println(parse1);
String text11="ZW321282050000000325";
Tokenizer tokenizer = new AnsjTokenizer(new StringReader(text11), 0, true);
CharTermAttribute termAtt = tokenizer.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt =
tokenizer.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAtt =
tokenizer.addAttribute(PositionIncrementAttribute.class);
tokenizer.reset();
while (tokenizer.incrementToken()){
System.out.print(new String(termAtt.toString()+" ") );
// System.out.print( offsetAtt.startOffset() + "-" + offsetAtt.endOffset() + "-" );
//System.out.print( positionIncrementAtt.getPositionIncrement() +"/");
}
tokenizer.close();
}
示例2: main
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
final Tokenizer tok = new WhitespaceTokenizer();
tok.setReader(new StringReader("dark sea green sea green"));
final SynonymMap.Builder builder = new SynonymMap.Builder(true);
addSynonym("dark sea green", "color", builder);
addSynonym("green", "color", builder);
addSynonym("dark sea", "color", builder);
addSynonym("sea green", "color", builder);
final SynonymMap synMap = builder.build();
final TokenStream ts = new SynonymFilter(tok, synMap, true);
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);
ts.reset();
int pos = -1;
while (ts.incrementToken()) {
pos += posIncrAtt.getPositionIncrement();
System.out.println("term=" + termAtt + ", pos=" + pos + ", posLen=" + posLengthAtt.getPositionLength());
}
ts.end();
ts.close();
}
示例3: parse
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
private List<TokenData> parse(String text) {
NamedAnalyzer analyzer = getAnalysisService().indexAnalyzers.get("test");
try {
try (TokenStream ts = analyzer.tokenStream("test", new StringReader(text))) {
List<TokenData> result = new ArrayList<>();
CharTermAttribute charTerm = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute position = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String original = text.substring(offset.startOffset(), offset.endOffset());
result.add(token(original, charTerm.toString(), position.getPositionIncrement()));
}
ts.end();
return result;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
示例4: PrefixAwareTokenFilter
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
super(suffix);
this.suffix = suffix;
this.prefix = prefix;
prefixExhausted = false;
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
p_termAtt = prefix.addAttribute(CharTermAttribute.class);
p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
p_typeAtt = prefix.addAttribute(TypeAttribute.class);
p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
示例5: analyze
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
private Set<String> analyze(String text) throws IOException {
Set<String> result = new HashSet<String>();
Analyzer analyzer = configuration.getAnalyzer();
try (TokenStream ts = analyzer.tokenStream("", text)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
while (ts.incrementToken()) {
int length = termAtt.length();
if (length == 0) {
throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
}
if (posIncAtt.getPositionIncrement() != 1) {
throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
}
result.add(new String(termAtt.buffer(), 0, termAtt.length()));
}
ts.end();
return result;
}
}
示例6: LTPTokenizer
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
/**
* Lucene constructor
*
* @throws UnirestException
* @throws JSONException
* @throws IOException
*/
public LTPTokenizer(Set<String> filter)
throws IOException, JSONException, UnirestException {
super();
logger.info("LTPTokenizer Initialize......");
// Add token offset attribute
offsetAttr = addAttribute(OffsetAttribute.class);
// Add token content attribute
charTermAttr = addAttribute(CharTermAttribute.class);
// Add token type attribute
typeAttr = addAttribute(TypeAttribute.class);
// Add token position attribute
piAttr = addAttribute(PositionIncrementAttribute.class);
// Create a new word segmenter to get tokens
LTPSeg = new LTPWordSegmenter(input);
// Add filter words set
this.filter = filter;
}
示例7: testSearch
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
@Test
public void testSearch() throws IOException {
LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
TokenStream tokenStream = analyzer.tokenStream("lc", "重qing");
CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);
tokenStream.reset();
Assert.assertTrue(tokenStream.incrementToken());
Assert.assertEquals(charTermAttribute.toString(), "重");
Assert.assertEquals(offsetAttribute.startOffset(), 0);
Assert.assertEquals(offsetAttribute.endOffset(), 1);
Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);
Assert.assertTrue(tokenStream.incrementToken());
Assert.assertEquals(charTermAttribute.toString(), "qing");
Assert.assertEquals(offsetAttribute.startOffset(), 1);
Assert.assertEquals(offsetAttribute.endOffset(), 5);
Assert.assertEquals(positionIncrementAttribute.getPositionIncrement(), 1);
tokenStream.close();
}
示例8: testFullPinyinFilter
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
public void testFullPinyinFilter() throws IOException {
LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陈楠");
LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.full_pinyin);
CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);
lcPinyinTokenFilter.reset();
while (lcPinyinTokenFilter.incrementToken()) {
System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
}
lcPinyinTokenFilter.close();
}
示例9: testFirstLetterFilter
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
public void testFirstLetterFilter() throws IOException {
LcPinyinAnalyzer analyzer = new LcPinyinAnalyzer(AnalysisSetting.search);
TokenStream tokenStream = analyzer.tokenStream("lc", "作者 : 陈楠");
LcPinyinTokenFilter lcPinyinTokenFilter = new LcPinyinTokenFilter(tokenStream, PinyinFilterSetting.first_letter);
CharTermAttribute charTermAttribute = lcPinyinTokenFilter.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = lcPinyinTokenFilter.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute positionIncrementAttribute = lcPinyinTokenFilter.getAttribute(PositionIncrementAttribute.class);
lcPinyinTokenFilter.reset();
while (lcPinyinTokenFilter.incrementToken()) {
System.out.println(charTermAttribute.toString() + ":" + offsetAttribute.startOffset() + "," + offsetAttribute.endOffset() + ":" + positionIncrementAttribute.getPositionIncrement());
}
lcPinyinTokenFilter.close();
}
示例10: copyTo
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
@Override
public void copyTo(AttributeImpl target) {
if (target instanceof Token) {
final Token to = (Token) target;
to.reinit(this);
// reinit shares the payload, so clone it:
if (payload !=null) {
to.payload = payload.clone();
}
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
((FlagsAttribute) target).setFlags(flags);
((TypeAttribute) target).setType(type);
}
}
示例11: testShorthand2
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
@Test
public void testShorthand2() throws IOException {
JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
tokenizer.setReader(new StringReader("{\"filing\": \"something\", \"prefix\": \"The \"}"));
tokenizer.reset();
assertTrue(tokenizer.incrementToken());
assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertTrue(tokenizer.incrementToken());
assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertFalse(tokenizer.incrementToken());
}
示例12: testShorthand3
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
@Test
public void testShorthand3() throws IOException {
JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
tokenizer.setReader(new StringReader("{\"prefix\": \"The \", \"filing\": \"something\"}"));
tokenizer.reset();
assertTrue(tokenizer.incrementToken());
assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertTrue(tokenizer.incrementToken());
assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertFalse(tokenizer.incrementToken());
}
示例13: tokensFromAnalysis
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
public static MyToken[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException
{
;
TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute positionIncrementAttr = stream.addAttribute(PositionIncrementAttribute.class);
TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);
OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class);
ArrayList<MyToken> tokenList = new ArrayList<MyToken>();
while (stream.incrementToken()) {
tokenList.add(new MyToken(term.toString(), positionIncrementAttr.getPositionIncrement(), typeAttr.type(),
offsetAttr.startOffset(), offsetAttr.endOffset()));
}
return tokenList.toArray(new MyToken[0]);
}
示例14: testCreateComponents
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
public void testCreateComponents() throws Exception
{
String text = "中华人民共和国很辽阔";
for (int i = 0; i < text.length(); ++i)
{
System.out.print(text.charAt(i) + "" + i + " ");
}
System.out.println();
Analyzer analyzer = new HanLPAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("field", text);
tokenStream.reset();
while (tokenStream.incrementToken())
{
CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
// 偏移量
OffsetAttribute offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
// 距离
PositionIncrementAttribute positionAttr = tokenStream.getAttribute(PositionIncrementAttribute.class);
// 词性
TypeAttribute typeAttr = tokenStream.getAttribute(TypeAttribute.class);
System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
}
}
示例15: testIssue
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; //导入依赖的package包/类
public void testIssue() throws Exception
{
Map<String, String> args = new TreeMap<>();
args.put("enableTraditionalChineseMode", "true");
args.put("enableNormalization", "true");
HanLPTokenizerFactory factory = new HanLPTokenizerFactory(args);
Tokenizer tokenizer = factory.create();
String text = "會辦台星保證最低價的原因?";
tokenizer.setReader(new StringReader(text));
tokenizer.reset();
while (tokenizer.incrementToken())
{
CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
// 偏移量
OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
// 距离
PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
// 词性
TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
}
}