本文整理匯總了Java中org.apache.lucene.analysis.tokenattributes.TypeAttribute.type方法的典型用法代碼示例。如果您正苦於以下問題:Java TypeAttribute.type方法的具體用法?Java TypeAttribute.type怎麽用?Java TypeAttribute.type使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.lucene.analysis.tokenattributes.TypeAttribute
的用法示例。
在下文中一共展示了TypeAttribute.type方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: main
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public static void main(final String[] args) {
try {
final String text = "lucene分析器使用分詞器和過濾器構成一個“管道”,文本在流經這個管道後成為可以進入索引的最小單位,因此,一個標準的分析器有兩個部分組成,一個是分詞器tokenizer,它用於將文本按照規則切分為一個個可以進入索引的最小單位。另外一個是TokenFilter,它主要作用是對切出來的詞進行進一步的處理(如去掉敏感詞、英文大小寫轉換、單複數處理)等。lucene中的Tokenstram方法首先創建一個tokenizer對象處理Reader對象中的流式文本,然後利用TokenFilter對輸出流進行過濾處理";
final ArrayList<String> myStopWords = CollectionLiterals.<String>newArrayList("的", "在", "了", "呢", ",", "0", ":", ",", "是", "流");
final CharArraySet stopWords = new CharArraySet(0, true);
for (final String word : myStopWords) {
stopWords.add(word);
}
CharArraySet _defaultStopSet = SmartChineseAnalyzer.getDefaultStopSet();
final Iterator<Object> itor = _defaultStopSet.iterator();
while (itor.hasNext()) {
Object _next = itor.next();
stopWords.add(_next);
}
final SmartChineseAnalyzer sca = new SmartChineseAnalyzer(stopWords);
final TokenStream ts = sca.tokenStream("field", text);
CharTermAttribute ch = ts.<CharTermAttribute>addAttribute(CharTermAttribute.class);
TypeAttribute type = ts.<TypeAttribute>addAttribute(TypeAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String _string = ch.toString();
String _plus = (_string + " | ");
String _type = type.type();
String _plus_1 = (_plus + _type);
InputOutput.<String>println(_plus_1);
}
ts.end();
ts.close();
} catch (Throwable _e) {
throw Exceptions.sneakyThrow(_e);
}
}
示例2: TokenTerm
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public TokenTerm(final CharTermAttribute termAtt, final PositionIncrementAttribute posIncrAtt,
final OffsetAttribute offsetAtt, final TypeAttribute typeAtt, final FlagsAttribute flagsAtt) {
this.term = termAtt != null ? termAtt.toString() : null;
this.start = offsetAtt != null ? offsetAtt.startOffset() : 0;
this.end = offsetAtt != null ? offsetAtt.endOffset() : 0;
this.increment = posIncrAtt != null ? posIncrAtt.getPositionIncrement() : 0;
this.type = typeAtt != null ? typeAtt.type() : null;
this.flags = flagsAtt != null ? flagsAtt.getFlags() : 0;
}
示例3: createComponents
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final TypeTokenizer tokenizer = new TypeTokenizer(reader);
// try {
// tokenizer.reset();
// } catch (IOException e) {
// logger.error("tokenizer reset error", e);
// }
// 분리된 어절을 하나씩 처리한다.
final CharTermAttribute charTermAttribute = tokenizer.getAttribute(CharTermAttribute.class);
final TypeAttribute typeAttribute = tokenizer.getAttribute(TypeAttribute.class);
TokenFilter filter = new TokenFilter(tokenizer) {
@Override
public boolean incrementToken() throws IOException {
boolean found = false;
while (input.incrementToken()) {
found = false;
if (typeAttribute.type() == TypeTokenizer.SYMBOL) {
// logger.debug("term : {}", charTermAttribute.toString());
continue;
}
//길이제한은 없음.
// if(typeAttribute.type() == TypeTokenizer.HANGUL && charTermAttribute.length() > 10){
// continue;
// }
//
// if(charTermAttribute.length() > 15){
// continue;
// }
found = true;
break;
}
return found;
}
};
return new TokenStreamComponents(tokenizer, filter);
}
示例4: testOutputComponentTypes
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public void testOutputComponentTypes() throws IOException {
String test = "The quick red fox jumped over the lazy brown dogs";
TokenTypeSplitFilter ttsf = new TokenTypeSplitFilter(new Blah(whitespaceMockTokenizer(test)), Collections.singleton("even"),
Collections.EMPTY_SET, "even_fork", "even_orig");
TokenTypeSplitFilter ttsfOdd = new TokenTypeSplitFilter(ttsf, Collections.singleton("odd"),
Collections.EMPTY_SET, "odd_fork", "odd_orig");
TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(ttsfOdd, new String[] {"even_orig", "even_fork"}, "joined", null, "!", true, true);
int count = 0;
TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class);
OffsetAttribute offsetAtt = ttjf.getAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncrAtt = ttjf.getAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class);
String lastTerm = null;
int lastStartOffset = -1;
int lastEndOffset = -1;
ttjf.reset();
while (ttjf.incrementToken()) {
String term = termAtt.toString();
String type = typeAtt.type();
int startOffset = offsetAtt.startOffset();
int endOffset = offsetAtt.endOffset();
int posIncr = posIncrAtt.getPositionIncrement();
BytesRef payload = payloadAtt.getPayload();
switch (count % 5) {
case 0:
assertEquals("even_orig", type);
assertEquals(1, posIncr);
assertEquals(lastEndOffset + 1, startOffset);
assertNull(payload);
break;
case 1:
assertEquals("even_fork", type);
assertEquals(lastTerm, term);
assertEquals(0, posIncr);
assertEquals(lastStartOffset, startOffset);
assertEquals(lastEndOffset, endOffset);
assertNull(payload);
break;
case 2:
assertEquals("joined", type);
assertEquals(0, posIncr);
assertEquals(lastStartOffset, startOffset);
String[] split = term.split("!");
assertEquals(split[0], split[1]);
assertNull(payload);
break;
case 3:
assertEquals("odd_orig", type);
assertEquals(1, posIncr);
assertEquals(lastEndOffset + 1, startOffset);
assertNull(payload);
break;
case 4:
assertEquals("odd_fork", type);
assertEquals(lastTerm, term);
assertEquals(0, posIncr);
assertEquals(lastStartOffset, startOffset);
assertEquals(lastEndOffset, endOffset);
assertNull(payload);
break;
}
lastTerm = term;
lastStartOffset = startOffset;
lastEndOffset = endOffset;
count++;
}
assertTrue(count + " does not equal: " + 25, count == 25);
}
示例5: ponctuer
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
/**
* Lancer l'étiquetage d'un texte, en rétablissant les espaces autour des
* ponctuations (selon les normes françaises).
*
* @param args
* @throws IOException
*/
public void ponctuer(Analyzer analyzer, String text, String field) throws IOException
{
TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
// PositionIncrementAttribute posinc = (PositionIncrementAttribute)
// stream.addAttribute(PositionIncrementAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
// OffsetAttribute offset = (OffsetAttribute)
// stream.addAttribute(OffsetAttribute.class);
String tok, tag;
char c;
boolean noSpaceBefore = true;
while (stream.incrementToken()) {
// pour accumulation dans un objet
// tokenList.add(new MyToken(term.term(), posinc.getPositionIncrement(),
// type.type(), offset.startOffset(), offset.endOffset()));
tok = term.toString();
c = tok.charAt(0);
tag = type.type();
// avant un mot
if (noSpaceBefore)
;
// espace insécable avant ponctuation double (?)
else if (";".equals(tok) || ":".equals(tok) || "!".equals(tok) || "?".equals(tok) || "»".equals(tok))
out.print(' ');
// avant : pas d'espace
else if (c == ',' || c == '.' || c == '…' || c == ')' || tok.startsWith("</") || tag.equals("PUNCT")
|| tag.equals("S"))
;
else
out.print(' ');
out.print(tok);
// après espace insécable
if (tok.equals("«"))
out.print(' ');
// pas d'espace après un tag ouvrant
if (c == '<' && !(tok.charAt(0) == '/'))
noSpaceBefore = true;
else
noSpaceBefore = false;
}
out.println();
}
示例6: getType
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public static String getType(AttributeSource source) {
TypeAttribute attr = source.addAttribute(TypeAttribute.class);
return attr.type();
}