當前位置: 首頁>>代碼示例>>Java>>正文


Java TypeAttribute.type方法代碼示例

本文整理匯總了Java中org.apache.lucene.analysis.tokenattributes.TypeAttribute.type方法的典型用法代碼示例。如果您正苦於以下問題:Java TypeAttribute.type方法的具體用法?Java TypeAttribute.type怎麽用?Java TypeAttribute.type使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.lucene.analysis.tokenattributes.TypeAttribute的用法示例。


在下文中一共展示了TypeAttribute.type方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: main

import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public static void main(final String[] args) {
  try {
    final String text = "lucene分析器使用分詞器和過濾器構成一個“管道”,文本在流經這個管道後成為可以進入索引的最小單位,因此,一個標準的分析器有兩個部分組成,一個是分詞器tokenizer,它用於將文本按照規則切分為一個個可以進入索引的最小單位。另外一個是TokenFilter,它主要作用是對切出來的詞進行進一步的處理(如去掉敏感詞、英文大小寫轉換、單複數處理)等。lucene中的Tokenstram方法首先創建一個tokenizer對象處理Reader對象中的流式文本,然後利用TokenFilter對輸出流進行過濾處理";
    final ArrayList<String> myStopWords = CollectionLiterals.<String>newArrayList("的", "在", "了", "呢", ",", "0", ":", ",", "是", "流");
    final CharArraySet stopWords = new CharArraySet(0, true);
    for (final String word : myStopWords) {
      stopWords.add(word);
    }
    CharArraySet _defaultStopSet = SmartChineseAnalyzer.getDefaultStopSet();
    final Iterator<Object> itor = _defaultStopSet.iterator();
    while (itor.hasNext()) {
      Object _next = itor.next();
      stopWords.add(_next);
    }
    final SmartChineseAnalyzer sca = new SmartChineseAnalyzer(stopWords);
    final TokenStream ts = sca.tokenStream("field", text);
    CharTermAttribute ch = ts.<CharTermAttribute>addAttribute(CharTermAttribute.class);
    TypeAttribute type = ts.<TypeAttribute>addAttribute(TypeAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      String _string = ch.toString();
      String _plus = (_string + " | ");
      String _type = type.type();
      String _plus_1 = (_plus + _type);
      InputOutput.<String>println(_plus_1);
    }
    ts.end();
    ts.close();
  } catch (Throwable _e) {
    throw Exceptions.sneakyThrow(_e);
  }
}
 
開發者ID:East196,項目名稱:maker,代碼行數:33,代碼來源:SmartCn.java

示例2: TokenTerm

import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public TokenTerm(final CharTermAttribute termAtt, final PositionIncrementAttribute posIncrAtt,
		final OffsetAttribute offsetAtt, final TypeAttribute typeAtt, final FlagsAttribute flagsAtt) {
	this.term = termAtt != null ? termAtt.toString() : null;
	this.start = offsetAtt != null ? offsetAtt.startOffset() : 0;
	this.end = offsetAtt != null ? offsetAtt.endOffset() : 0;
	this.increment = posIncrAtt != null ? posIncrAtt.getPositionIncrement() : 0;
	this.type = typeAtt != null ? typeAtt.type() : null;
	this.flags = flagsAtt != null ? flagsAtt.getFlags() : 0;
}
 
開發者ID:jaeksoft,項目名稱:opensearchserver,代碼行數:10,代碼來源:TokenTerm.java

示例3: createComponents

import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
@Override
	protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

		final TypeTokenizer tokenizer = new TypeTokenizer(reader);
//		try {
//			tokenizer.reset();
//		} catch (IOException e) {
//			logger.error("tokenizer reset error", e);
//		}
		// 분리된 어절을 하나씩 처리한다.

		final CharTermAttribute charTermAttribute = tokenizer.getAttribute(CharTermAttribute.class);
		final TypeAttribute typeAttribute = tokenizer.getAttribute(TypeAttribute.class);

		TokenFilter filter = new TokenFilter(tokenizer) {

			@Override
			public boolean incrementToken() throws IOException {
				boolean found = false;
				while (input.incrementToken()) {
					found = false;
					if (typeAttribute.type() == TypeTokenizer.SYMBOL) {
						// logger.debug("term : {}", charTermAttribute.toString());
						continue;
					}
					
					//길이제한은 없음.
					
//					if(typeAttribute.type() == TypeTokenizer.HANGUL && charTermAttribute.length() > 10){
//						continue;
//					}
//					
//					if(charTermAttribute.length() > 15){
//						continue;
//					}
					
					found = true;
					break;
				}
				
				return found;
			}
		};

		return new TokenStreamComponents(tokenizer, filter);
	}
 
開發者ID:gncloud,項目名稱:fastcatsearch3,代碼行數:47,代碼來源:PrimaryWordAnalyzer.java

示例4: testOutputComponentTypes

import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public void testOutputComponentTypes() throws IOException {
  String test = "The quick red fox jumped over the lazy brown dogs";

  TokenTypeSplitFilter ttsf = new TokenTypeSplitFilter(new Blah(whitespaceMockTokenizer(test)), Collections.singleton("even"),
      Collections.EMPTY_SET, "even_fork", "even_orig");
  TokenTypeSplitFilter ttsfOdd = new TokenTypeSplitFilter(ttsf, Collections.singleton("odd"),
      Collections.EMPTY_SET, "odd_fork", "odd_orig");
  TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(ttsfOdd, new String[] {"even_orig", "even_fork"}, "joined", null, "!", true, true);
  int count = 0;
  TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class);
  OffsetAttribute offsetAtt = ttjf.getAttribute(OffsetAttribute.class);
  PositionIncrementAttribute posIncrAtt = ttjf.getAttribute(PositionIncrementAttribute.class);
  CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class);
  PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class);
  String lastTerm = null;
  int lastStartOffset = -1;
  int lastEndOffset = -1;
  ttjf.reset();
  while (ttjf.incrementToken()) {
    String term = termAtt.toString();
    String type = typeAtt.type();
    int startOffset = offsetAtt.startOffset();
    int endOffset = offsetAtt.endOffset();
    int posIncr = posIncrAtt.getPositionIncrement();
    BytesRef payload = payloadAtt.getPayload();
    switch (count % 5) {
      case 0:
        assertEquals("even_orig", type);
        assertEquals(1, posIncr);
        assertEquals(lastEndOffset + 1, startOffset);
        assertNull(payload);
        break;
      case 1:
        assertEquals("even_fork", type);
        assertEquals(lastTerm, term);
        assertEquals(0, posIncr);
        assertEquals(lastStartOffset, startOffset);
        assertEquals(lastEndOffset, endOffset);
        assertNull(payload);
        break;
      case 2:
        assertEquals("joined", type);
        assertEquals(0, posIncr);
        assertEquals(lastStartOffset, startOffset);
        String[] split = term.split("!");
        assertEquals(split[0], split[1]);
        assertNull(payload);
        break;
      case 3:
        assertEquals("odd_orig", type);
        assertEquals(1, posIncr);
        assertEquals(lastEndOffset + 1, startOffset);
        assertNull(payload);
        break;
      case 4:
        assertEquals("odd_fork", type);
        assertEquals(lastTerm, term);
        assertEquals(0, posIncr);
        assertEquals(lastStartOffset, startOffset);
        assertEquals(lastEndOffset, endOffset);
        assertNull(payload);
        break;
    }
    lastTerm = term;
    lastStartOffset = startOffset;
    lastEndOffset = endOffset;
    count++;
  }
  assertTrue(count + " does not equal: " + 25, count == 25);

}
 
開發者ID:upenn-libraries,項目名稱:solrplugins,代碼行數:72,代碼來源:TokenTypeJoinFilterTest.java

示例5: ponctuer

import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
/**
 * Lancer l'étiquetage d'un texte, en rétablissant les espaces autour des
 * ponctuations (selon les normes françaises).
 * 
 * @param args
 * @throws IOException
 */
public void ponctuer(Analyzer analyzer, String text, String field) throws IOException
{
  TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
  CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
  // PositionIncrementAttribute posinc = (PositionIncrementAttribute)
  // stream.addAttribute(PositionIncrementAttribute.class);
  TypeAttribute type = stream.addAttribute(TypeAttribute.class);
  // OffsetAttribute offset = (OffsetAttribute)
  // stream.addAttribute(OffsetAttribute.class);
  String tok, tag;
  char c;
  boolean noSpaceBefore = true;
  while (stream.incrementToken()) {
    // pour accumulation dans un objet
    // tokenList.add(new MyToken(term.term(), posinc.getPositionIncrement(),
    // type.type(), offset.startOffset(), offset.endOffset()));
    tok = term.toString();
    c = tok.charAt(0);
    tag = type.type();
    // avant un mot
    if (noSpaceBefore)
      ;
    // espace insécable avant ponctuation double (?)
    else if (";".equals(tok) || ":".equals(tok) || "!".equals(tok) || "?".equals(tok) || "»".equals(tok))
      out.print(' ');
    // avant : pas d'espace
    else if (c == ',' || c == '.' || c == '…' || c == ')' || tok.startsWith("</") || tag.equals("PUNCT")
        || tag.equals("S"))
      ;
    else
      out.print(' ');

    out.print(tok);

    // après espace insécable
    if (tok.equals("«"))
      out.print(' ');
    // pas d'espace après un tag ouvrant
    if (c == '<' && !(tok.charAt(0) == '/'))
      noSpaceBefore = true;
    else
      noSpaceBefore = false;
  }
  out.println();
}
 
開發者ID:oeuvres,項目名稱:Alix,代碼行數:53,代碼來源:Demo.java

示例6: getType

import org.apache.lucene.analysis.tokenattributes.TypeAttribute; //導入方法依賴的package包/類
public static String getType(AttributeSource source) {
	TypeAttribute attr = source.addAttribute(TypeAttribute.class);
	return attr.type();
}
 
開發者ID:flash0729,項目名稱:ansj-seg-for-lucene3,代碼行數:5,代碼來源:AnalyzerUtils.java


注:本文中的org.apache.lucene.analysis.tokenattributes.TypeAttribute.type方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。