本文整理匯總了Java中org.apache.lucene.analysis.tokenattributes.TypeAttribute類的典型用法代碼示例。如果您正苦於以下問題:Java TypeAttribute類的具體用法?Java TypeAttribute怎麽用?Java TypeAttribute使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
TypeAttribute類屬於org.apache.lucene.analysis.tokenattributes包,在下文中一共展示了TypeAttribute類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: assertTokenStream
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
tokenStream.reset();
int index = 0;
while (tokenStream.incrementToken() == true) {
assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());
if(expectedTypes != null) {
assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
}
OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);
if(expectedStartOffsets != null) {
assertEquals(expectedStartOffsets[index], offsets.startOffset());
}
if(expectedEndOffsets != null) {
assertEquals(expectedEndOffsets[index], offsets.endOffset());
}
index++;
}
tokenStream.end();
}
開發者ID:open-korean-text,項目名稱:elasticsearch-analysis-openkoreantext,代碼行數:25,代碼來源:TokenStreamAssertions.java
示例2: PrefixAwareTokenFilter
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
super(suffix);
this.suffix = suffix;
this.prefix = prefix;
prefixExhausted = false;
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
p_termAtt = prefix.addAttribute(CharTermAttribute.class);
p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
p_typeAtt = prefix.addAttribute(TypeAttribute.class);
p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
示例3: LTPTokenizer
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
/**
* Lucene constructor
*
* @throws UnirestException
* @throws JSONException
* @throws IOException
*/
public LTPTokenizer(Set<String> filter)
throws IOException, JSONException, UnirestException {
super();
logger.info("LTPTokenizer Initialize......");
// Add token offset attribute
offsetAttr = addAttribute(OffsetAttribute.class);
// Add token content attribute
charTermAttr = addAttribute(CharTermAttribute.class);
// Add token type attribute
typeAttr = addAttribute(TypeAttribute.class);
// Add token position attribute
piAttr = addAttribute(PositionIncrementAttribute.class);
// Create a new word segmenter to get tokens
LTPSeg = new LTPWordSegmenter(input);
// Add filter words set
this.filter = filter;
}
示例4: copyTo
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
@Override
public void copyTo(AttributeImpl target) {
if (target instanceof Token) {
final Token to = (Token) target;
to.reinit(this);
// reinit shares the payload, so clone it:
if (payload !=null) {
to.payload = payload.clone();
}
} else {
super.copyTo(target);
((OffsetAttribute) target).setOffset(startOffset, endOffset);
((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
((FlagsAttribute) target).setFlags(flags);
((TypeAttribute) target).setType(type);
}
}
示例5: normalizeQueryTarget
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
@Override
public BytesRef normalizeQueryTarget(String val, boolean strict, String fieldName, boolean appendExtraDelim) throws IOException {
TokenStream ts = getQueryAnalyzer().tokenStream(fieldName, val);
try {
ts.reset();
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = ts.getAttribute(TypeAttribute.class);
String matchType = strict ? INDEXED_TOKEN_TYPE : NORMALIZED_TOKEN_TYPE;
while (ts.incrementToken()) {
if (matchType.equals(typeAtt.type())) {
BytesRefBuilder ret = new BytesRefBuilder();
ret.copyChars(termAtt.toString());
if (!strict || appendExtraDelim) {
ret.append(delimBytes, 0, delimBytes.length);
}
return ret.get();
}
}
return new BytesRef(BytesRef.EMPTY_BYTES);
} finally {
ts.close();
}
}
示例6: testTypeForPayload1
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
/** verify that payload gets picked up for 1st group of tokens */
public void testTypeForPayload1() throws IOException {
TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(new TokenArrayTokenizer(tokensWithPayloads), new String[] {"normalized", "filing", "prefix"},
"joined", "normalized", "!", false, false);
CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class);
ttjf.reset();
assertTrue(ttjf.incrementToken());
assertEquals("unconsoled!Unconsoled!The ", termAtt.toString());
assertEquals("joined", typeAtt.type());
assertEquals("payload1", payloadAtt.getPayload().utf8ToString());
assertTrue(ttjf.incrementToken());
assertEquals("room with a view!Room With A View!A ", termAtt.toString());
assertEquals("joined", typeAtt.type());
assertNull(payloadAtt.getPayload());
assertFalse(ttjf.incrementToken());
}
示例7: testTypeForPayload2
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
/** verify that payload gets picked up for 2nd group of tokens */
public void testTypeForPayload2() throws IOException {
TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(new TokenArrayTokenizer(tokensWithPayloads), new String[] {"normalized", "filing", "prefix"},
"joined", "filing", "!", false, false);
CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class);
ttjf.reset();
assertTrue(ttjf.incrementToken());
assertEquals("unconsoled!Unconsoled!The ", termAtt.toString());
assertEquals("joined", typeAtt.type());
assertNull(payloadAtt.getPayload());
assertTrue(ttjf.incrementToken());
assertEquals("room with a view!Room With A View!A ", termAtt.toString());
assertEquals("joined", typeAtt.type());
assertEquals("payload2", payloadAtt.getPayload().utf8ToString());
assertFalse(ttjf.incrementToken());
}
示例8: testShorthand2
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
@Test
public void testShorthand2() throws IOException {
JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
tokenizer.setReader(new StringReader("{\"filing\": \"something\", \"prefix\": \"The \"}"));
tokenizer.reset();
assertTrue(tokenizer.incrementToken());
assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertTrue(tokenizer.incrementToken());
assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertFalse(tokenizer.incrementToken());
}
示例9: testShorthand3
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
@Test
public void testShorthand3() throws IOException {
JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
tokenizer.setReader(new StringReader("{\"prefix\": \"The \", \"filing\": \"something\"}"));
tokenizer.reset();
assertTrue(tokenizer.incrementToken());
assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertTrue(tokenizer.incrementToken());
assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());
assertFalse(tokenizer.incrementToken());
}
示例10: tokensFromAnalysis
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
public static MyToken[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException
{
;
TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute positionIncrementAttr = stream.addAttribute(PositionIncrementAttribute.class);
TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);
OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class);
ArrayList<MyToken> tokenList = new ArrayList<MyToken>();
while (stream.incrementToken()) {
tokenList.add(new MyToken(term.toString(), positionIncrementAttr.getPositionIncrement(), typeAttr.type(),
offsetAttr.startOffset(), offsetAttr.endOffset()));
}
return tokenList.toArray(new MyToken[0]);
}
示例11: testCreateComponents
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
public void testCreateComponents() throws Exception
{
String text = "中華人民共和國很遼闊";
for (int i = 0; i < text.length(); ++i)
{
System.out.print(text.charAt(i) + "" + i + " ");
}
System.out.println();
Analyzer analyzer = new HanLPAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("field", text);
tokenStream.reset();
while (tokenStream.incrementToken())
{
CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
// 偏移量
OffsetAttribute offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
// 距離
PositionIncrementAttribute positionAttr = tokenStream.getAttribute(PositionIncrementAttribute.class);
// 詞性
TypeAttribute typeAttr = tokenStream.getAttribute(TypeAttribute.class);
System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
}
}
示例12: testIssue
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
public void testIssue() throws Exception
{
Map<String, String> args = new TreeMap<>();
args.put("enableTraditionalChineseMode", "true");
args.put("enableNormalization", "true");
HanLPTokenizerFactory factory = new HanLPTokenizerFactory(args);
Tokenizer tokenizer = factory.create();
String text = "會辦台星保證最低價的原因?";
tokenizer.setReader(new StringReader(text));
tokenizer.reset();
while (tokenizer.incrementToken())
{
CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
// 偏移量
OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
// 距離
PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
// 詞性
TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
}
}
示例13: testCreateComponents
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
public void testCreateComponents() throws Exception
{
String text = "中華人民共和國很遼闊";
for (int i = 0; i < text.length(); ++i)
{
System.out.print(text.charAt(i) + "" + i + " ");
}
System.out.println();
Analyzer analyzer = new HanLPIndexAnalyzer();
TokenStream tokenStream = analyzer.tokenStream("field", text);
tokenStream.reset();
while (tokenStream.incrementToken())
{
CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
// 偏移量
OffsetAttribute offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
// 距離
PositionIncrementAttribute positionAttr = tokenStream.getAttribute(PositionIncrementAttribute.class);
// 詞性
TypeAttribute typeAttr = tokenStream.getAttribute(TypeAttribute.class);
System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
}
}
示例14: tokenize
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
/**
* Tokenize the given input using a {@link URLTokenizer}. Settings which have been set on this {@link URLTokenFilter}
* will be passed along to the tokenizer.
* @param input a string to be tokenized
* @return a list of tokens extracted from the input string
* @throws IOException
*/
private List<Token> tokenize(String input) throws IOException {
List<Token> tokens = new ArrayList<>();
URLTokenizer tokenizer = new URLTokenizer();
// create a copy of the parts list to avoid ConcurrentModificationException when sorting
tokenizer.setParts(new ArrayList<>(parts));
tokenizer.setUrlDecode(urlDeocde);
tokenizer.setTokenizeHost(tokenizeHost);
tokenizer.setTokenizePath(tokenizePath);
tokenizer.setTokenizeQuery(tokenizeQuery);
tokenizer.setAllowMalformed(allowMalformed || passthrough);
tokenizer.setTokenizeMalformed(tokenizeMalformed);
tokenizer.setReader(new StringReader(input));
tokenizer.reset();
String term;
URLPart part;
OffsetAttribute offset;
while (tokenizer.incrementToken()) {
term = tokenizer.getAttribute(CharTermAttribute.class).toString();
part = URLPart.fromString(tokenizer.getAttribute(TypeAttribute.class).type());
offset = tokenizer.getAttribute(OffsetAttribute.class);
tokens.add(new Token(term, part, offset.startOffset(), offset.endOffset()));
}
return tokens;
}
示例15: collectExtractedNouns
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入依賴的package包/類
protected List<TestToken> collectExtractedNouns(TokenStream stream) throws IOException {
CharTermAttribute charTermAtt = stream.addAttribute(CharTermAttribute.class);
OffsetAttribute offSetAtt = stream.addAttribute(OffsetAttribute.class);
TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);
List<TestToken> extractedTokens = Lists.newArrayList();
while(stream.incrementToken()) {
TestToken t = getToken(charTermAtt.toString(), offSetAtt.startOffset(), offSetAtt.endOffset());
System.out.println("termAtt.term() : " + charTermAtt.toString());
System.out.println("startoffSetAtt : " + offSetAtt.startOffset());
System.out.println("endoffSetAtt : " + offSetAtt.endOffset());
System.out.println("typeAttr : " + typeAttr.toString());
extractedTokens.add(t);
}
return extractedTokens;
}