当前位置: 首页>>代码示例>>Java>>正文


Java CharTermAttribute.length方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.tokenattributes.CharTermAttribute.length方法的典型用法代码示例。如果您正苦于以下问题:Java CharTermAttribute.length方法的具体用法?Java CharTermAttribute.length怎么用?Java CharTermAttribute.length使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.tokenattributes.CharTermAttribute的用法示例。


在下文中一共展示了CharTermAttribute.length方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: splitByTokenizer

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
  StringReader reader = new StringReader( source );
  TokenStream ts = loadTokenizer(tokFactory, reader);
  List<String> tokList = new ArrayList<>();
  try {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()){
      if( termAtt.length() > 0 )
        tokList.add( termAtt.toString() );
    }
  } finally{
    reader.close();
  }
  return tokList;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:17,代码来源:SlowSynonymFilterFactory.java

示例2: analyze

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private Set<String> analyze(String text) throws IOException {
    Set<String> result = new HashSet<String>();
    Analyzer analyzer = configuration.getAnalyzer();
    try (TokenStream ts = analyzer.tokenStream("", text)) {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            int length = termAtt.length();
            if (length == 0) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
            }
            if (posIncAtt.getPositionIncrement() != 1) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
            }

            result.add(new String(termAtt.buffer(), 0, termAtt.length()));
        }

        ts.end();
        return result;
    }
}
 
开发者ID:ginobefun,项目名称:elasticsearch-dynamic-synonym,代码行数:24,代码来源:SimpleSynonymMap.java

示例3: tokenize

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private ArrayList<char[]> tokenize( String input ) throws IOException {

    Log.debug( "tokenize '" + input + "'" );
    ArrayList<char[]> tokens = new ArrayList<char[]>( );
    Tokenizer tk = getTokenizerImpl( input );

    CharTermAttribute term = tk.addAttribute( CharTermAttribute.class );
    tk.reset( );
    while (tk.incrementToken( ) ) {
      int bufLen = term.length();
      char[] copy = new char[ bufLen ];
      System.arraycopy(term.buffer( ), 0, copy, 0, bufLen );
      tokens.add( copy );
    }

    return tokens;
  }
 
开发者ID:lucidworks,项目名称:query-autofiltering-component,代码行数:18,代码来源:QueryAutoFilteringComponent.java

示例4: tokenize

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private ArrayList<char[]> tokenize( String input ) throws IOException {
      
  Log.debug( "tokenize '" + input + "'" );
  ArrayList<char[]> tokens = new ArrayList<char[]>( );
  Tokenizer tk = getTokenizerImpl( input );
  
  CharTermAttribute term = tk.addAttribute( CharTermAttribute.class );
  tk.reset( );
  while (tk.incrementToken( ) ) {
    int bufLen = term.length();
    char[] copy = new char[ bufLen ];
    System.arraycopy(term.buffer( ), 0, copy, 0, bufLen );
    tokens.add( copy );
  }
      
  return tokens;
}
 
开发者ID:lucidworks,项目名称:query-autofiltering-component,代码行数:18,代码来源:QueryAutoFilteringComponent.java

示例5: analyze

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private CharsRef analyze(Analyzer analyzer, String text) throws IOException {
	CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
	try (TokenStream ts = analyzer.tokenStream("", text)) {
		CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
		PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
		ts.reset();
		while (ts.incrementToken()) {
			int length = termAtt.length();
			if (length == 0) {
				throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
			}
			charsRefBuilder.grow(charsRefBuilder.length() + length + 1); /* current + word + separator */
			if (charsRefBuilder.length() > 0) {
				charsRefBuilder.append(CcWordSet.WORD_SEPARATOR);
			}
			charsRefBuilder.append(termAtt);
		}
		ts.end();
	}
	if (charsRefBuilder.length() == 0) {
		return null;
	}
	charsRefBuilder.append(CcWordSet.WORD_END);
	return charsRefBuilder.get();
}
 
开发者ID:thihy,项目名称:cc-analysis,代码行数:26,代码来源:CcWordsFilterTest.java

示例6: handleTokenStream

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
private void handleTokenStream(Map<Integer, List<Token>> tokenPosMap, TokenStream tokenStream) throws IOException {
    tokenStream.reset();
    int pos = 0;

    CharTermAttribute charTermAttribute = getCharTermAttribute(tokenStream);
    OffsetAttribute offsetAttribute = getOffsetAttribute(tokenStream);
    TypeAttribute typeAttribute = getTypeAttribute(tokenStream);
    PositionIncrementAttribute positionIncrementAttribute = getPositionIncrementAttribute(tokenStream);

    while (tokenStream.incrementToken()) {
        if (null == charTermAttribute || null == offsetAttribute) {
            return;
        }
        Token token = new Token(charTermAttribute.buffer(), 0, charTermAttribute.length(),
                offsetAttribute.startOffset(), offsetAttribute.endOffset());
        if (null != typeAttribute) {
            token.setType(typeAttribute.type());
        }
        pos += null != positionIncrementAttribute ? positionIncrementAttribute.getPositionIncrement() : 1;
        if (!tokenPosMap.containsKey(pos)) {
            tokenPosMap.put(pos, new LinkedList<Token>());
        }
        tokenPosMap.get(pos).add(token);
    }
    tokenStream.close();
}
 
开发者ID:smalldirector,项目名称:solr-multilingual-analyzer,代码行数:27,代码来源:MultiLangTokenizer.java

示例7: analyze

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
/** Sugar: analyzes the text with the analyzer and
 *  separates by {@link SynonymMap#WORD_SEPARATOR}.
 *  reuse and its chars must not be null. */
public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
  try (TokenStream ts = analyzer.tokenStream("", text)) {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    ts.reset();
    reuse.clear();
    while (ts.incrementToken()) {
      int length = termAtt.length();
      if (length == 0) {
        throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
      }
      if (posIncAtt.getPositionIncrement() != 1) {
        throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
      }
      reuse.grow(reuse.length() + length + 1); /* current + word + separator */
      int end = reuse.length();
      if (reuse.length() > 0) {
        reuse.setCharAt(end++, SynonymMap.WORD_SEPARATOR);
        reuse.setLength(reuse.length() + 1);
      }
      System.arraycopy(termAtt.buffer(), 0, reuse.chars(), end, length);
      reuse.setLength(reuse.length() + length);
    }
    ts.end();
  }
  if (reuse.length() == 0) {
    throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
  }
  return reuse.get();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:34,代码来源:SynonymMap.java

示例8: map

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
  TokenStream stream = analyzer.reusableTokenStream(key.toString(), new StringReader(value.toString()));
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  StringTuple document = new StringTuple();
  stream.reset();
  while (stream.incrementToken()) {
    if (termAtt.length() > 0) {
      document.add(new String(termAtt.buffer(), 0, termAtt.length()));
    }
  }
  context.write(key, document);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:14,代码来源:SequenceFileTokenizerMapper.java

示例9: setText

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
void setText(final CharTermAttribute token) {
  this.token = token;
  this.buffer = token.buffer();
  this.length = token.length();
}
 
开发者ID:europeana,项目名称:search,代码行数:6,代码来源:ICUTransformFilter.java

示例10: toFormattedString

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; //导入方法依赖的package包/类
@Override
public String toFormattedString(Field f) throws IOException {
  Map<String,Object> map = new LinkedHashMap<>();
  map.put(VERSION_KEY, VERSION);
  if (f.fieldType().stored()) {
    String stringValue = f.stringValue();
    if (stringValue != null) {
      map.put(STRING_KEY, stringValue);
    }
    BytesRef binaryValue = f.binaryValue();
    if (binaryValue != null) {
      map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
    }
  }
  TokenStream ts = f.tokenStreamValue();
  if (ts != null) {
    List<Map<String,Object>> tokens = new LinkedList<>();
    while (ts.incrementToken()) {
      Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
      String cTerm = null;
      String tTerm = null;
      Map<String,Object> tok = new TreeMap<>();
      while (it.hasNext()) {
        Class<? extends Attribute> cl = it.next();
        Attribute att = ts.getAttribute(cl);
        if (att == null) {
          continue;
        }
        if (cl.isAssignableFrom(CharTermAttribute.class)) {
          CharTermAttribute catt = (CharTermAttribute)att;
          cTerm = new String(catt.buffer(), 0, catt.length());
        } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
          TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
          tTerm = tatt.getBytesRef().utf8ToString();
        } else {
          if (cl.isAssignableFrom(FlagsAttribute.class)) {
            tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags()));
          } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
            tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset());
            tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset());
          } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
            BytesRef p = ((PayloadAttribute)att).getPayload();
            if (p != null && p.length > 0) {
              tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
            }
          } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
            tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement());
          } else if (cl.isAssignableFrom(TypeAttribute.class)) {
            tok.put(TYPE_KEY, ((TypeAttribute)att).type());
          } else {
            tok.put(cl.getName(), att.toString());
          }
        }
      }
      String term = null;
      if (cTerm != null) {
        term = cTerm;
      } else {
        term = tTerm;
      }
      if (term != null && term.length() > 0) {
        tok.put(TOKEN_KEY, term);
      }
      tokens.add(tok);
    }
    map.put(TOKENS_KEY, tokens);
  }
  return JSONUtil.toJSON(map, -1);
}
 
开发者ID:europeana,项目名称:search,代码行数:70,代码来源:JsonPreAnalyzedParser.java


注:本文中的org.apache.lucene.analysis.tokenattributes.CharTermAttribute.length方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。