当前位置: 首页>>代码示例>>Java>>正文


Java TokenStream.incrementToken方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.TokenStream.incrementToken方法的典型用法代码示例。如果您正苦于以下问题:Java TokenStream.incrementToken方法的具体用法?Java TokenStream.incrementToken怎么用?Java TokenStream.incrementToken使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.TokenStream的用法示例。


在下文中一共展示了TokenStream.incrementToken方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: lemmatize

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
protected String lemmatize(String query) {
	ItalianAnalyzer analyzer = new ItalianAnalyzer();
	TokenStream tokenStream = analyzer.tokenStream("label", query);
	
	
	StringBuilder sb = new StringBuilder();
	CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
	try {
    	tokenStream.reset();
		while (tokenStream.incrementToken()) {
		    if (sb.length() > 0) {
		        sb.append(" ");
		    }
		    sb.append(token.toString());
		}
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	return sb.toString();
}
 
开发者ID:teamdigitale,项目名称:ontonethub,代码行数:23,代码来源:AbstractIndexingJob.java

示例2: splitByTokenizer

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws IOException{
  StringReader reader = new StringReader( source );
  TokenStream ts = loadTokenizer(tokFactory, reader);
  List<String> tokList = new ArrayList<>();
  try {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()){
      if( termAtt.length() > 0 )
        tokList.add( termAtt.toString() );
    }
  } finally{
    reader.close();
  }
  return tokList;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:17,代码来源:SlowSynonymFilterFactory.java

示例3: getFilter

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Filter getFilter(Element e) throws ParserException {
  List<BytesRef> terms = new ArrayList<>();
  String text = DOMUtils.getNonBlankTextOrFail(e);
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");

  TokenStream ts = null;
  try {
    ts = analyzer.tokenStream(fieldName, text);
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();
    ts.reset();
    while (ts.incrementToken()) {
      termAtt.fillBytesRef();
      terms.add(BytesRef.deepCopyOf(bytes));
    }
    ts.end();
  }
  catch (IOException ioe) {
    throw new RuntimeException("Error constructing terms from index:" + ioe);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }
  return new TermsFilter(fieldName, terms);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:26,代码来源:TermsFilterBuilder.java

示例4: assertOffsets

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
static private void assertOffsets(String inputStr, TokenStream tokenStream, List<String> expected) {
    try {
        List<String> termList = new ArrayList<String>();
        // CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAttr = tokenStream.addAttribute(OffsetAttribute.class);
        while (tokenStream.incrementToken()) {
            int start = offsetAttr.startOffset();
            int end = offsetAttr.endOffset();
            termList.add(inputStr.substring(start, end));
        }
        System.out.println(String.join(" ", termList));
        assertThat(termList, is(expected));
    } catch (IOException e) {
        assertTrue(false);
    }
}
 
开发者ID:BuddhistDigitalResourceCenter,项目名称:lucene-bo,代码行数:17,代码来源:TibetanAnalyzerTest.java

示例5: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
 *  because really the caller who called {@link Analyzer#tokenStream} should close it,
 *  but when trying that there are recursion issues when we try to use the same
 *  TokenStrem twice in the same recursion... */
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
    int numTokens = 0;
    boolean success = false;
    try {
        stream.reset();
        consumer.reset(stream);
        while (stream.incrementToken()) {
            consumer.nextToken();
            numTokens++;
        }
        consumer.end();
    } finally {
        if (success) {
            stream.close();
        } else {
            IOUtils.closeWhileHandlingException(stream);
        }
    }
    return numTokens;
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:25,代码来源:SuggestUtils.java

示例6: test

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Test
public void test() throws IOException {
    TokenStream ts = analyzer.tokenStream("field", new StringReader("大大大战争"));
    while (ts.incrementToken()) {
        System.out.println("token : " + ts);
    }
}
 
开发者ID:zhaoxi1988,项目名称:sjk,代码行数:8,代码来源:AnalyzerTest.java

示例7: analysisResult

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/**
 * 打印出给定分词器的分词结果
 *
 * @param analyzer 分词器
 * @param keyWord  关键词
 * @throws Exception
 */
private static List<String> analysisResult(Analyzer analyzer, String keyWord)
        throws Exception {
    TokenStream tokenStream = analyzer.tokenStream("content",
            new StringReader(keyWord));
    tokenStream.addAttribute(CharTermAttribute.class);
    List<String> stringList = new ArrayList<String>();
    while (tokenStream.incrementToken()) {
        CharTermAttribute charTermAttribute = tokenStream
                .getAttribute(CharTermAttribute.class);
        stringList.add(charTermAttribute.toString());

    }
    return stringList;
}
 
开发者ID:NeilRen,项目名称:NEILREN4J,代码行数:22,代码来源:IKAnalyzerService.java

示例8: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);
        PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:TransportAnalyzeAction.java

示例9: termsFromTokenStream

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private String[] termsFromTokenStream(TokenStream stream) throws IOException {

        List<String> outputTemp=new ArrayList<>();
        CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            outputTemp.add(charTermAttribute.toString());
        }
        stream.end();
        stream.close();

        return outputTemp.toArray(new String[0]);
    }
 
开发者ID:sebastian-hofstaetter,项目名称:ir-generalized-translation-models,代码行数:14,代码来源:SimilarityApiParser.java

示例10: assertSimpleTSOutput

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public static void assertSimpleTSOutput(TokenStream stream,
                                        String[] expected) throws IOException {
    stream.reset();
    CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
    assertThat(termAttr, notNullValue());
    int i = 0;
    while (stream.incrementToken()) {
        assertThat(expected.length, greaterThan(i));
        assertThat( "expected different term at index " + i, expected[i++], equalTo(termAttr.toString()));
    }
    assertThat("not all tokens produced", i, equalTo(expected.length));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:13,代码来源:KuromojiAnalysisTests.java

示例11: getSpanQuery

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
  String value = DOMUtils.getNonBlankTextOrFail(e);

  List<SpanQuery> clausesList = new ArrayList<>();

  TokenStream ts = null;
  try {
    ts = analyzer.tokenStream(fieldName, value);
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();
    ts.reset();
    while (ts.incrementToken()) {
      termAtt.fillBytesRef();
      SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
      clausesList.add(stq);
    }
    ts.end();
    SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
    soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
    return soq;
  }
  catch (IOException ioe) {
    throw new ParserException("IOException parsing value:" + value);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:30,代码来源:SpanOrTermsBuilder.java

示例12: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:31,代码来源:TransportAnalyzeAction.java

示例13: lemmatize

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public static String lemmatize(String query) {
   	StringBuilder sb = new StringBuilder();
   	
	ItalianAnalyzer analyzer = new ItalianAnalyzer(Version.LUCENE_44);
	TokenStream tokenStream;
	try {
		tokenStream = analyzer.tokenStream("label", query);
		
		CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class);
		
    	tokenStream.reset();
		while (tokenStream.incrementToken()) {
		    if (sb.length() > 0) {
		        sb.append(" ");
		    }
		    sb.append(token.toString());
		}
		
		analyzer.close();
	} catch (IOException e) {
		log.error(e.getMessage(), e);
		sb = new StringBuilder();
		sb.append(query);
	}
	
	
	return sb.toString();
}
 
开发者ID:teamdigitale,项目名称:ontonethub,代码行数:29,代码来源:JerseyUtils.java

示例14: analyzeSingleChunk

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/**
 * Returns the analyzed form for the given chunk
 * 
 * If the analyzer produces more than one output token from the given chunk,
 * a ParseException is thrown.
 *
 * @param field The target field
 * @param termStr The full term from which the given chunk is excerpted
 * @param chunk The portion of the given termStr to be analyzed
 * @return The result of analyzing the given chunk
 * @throws ParseException when analysis returns other than one output token
 */
protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
  String analyzed = null;
  TokenStream stream = null;
  try {
    stream = getAnalyzer().tokenStream(field, chunk);
    stream.reset();
    CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
    // get first and hopefully only output token
    if (stream.incrementToken()) {
      analyzed = termAtt.toString();
      
      // try to increment again, there should only be one output token
      StringBuilder multipleOutputs = null;
      while (stream.incrementToken()) {
        if (null == multipleOutputs) {
          multipleOutputs = new StringBuilder();
          multipleOutputs.append('"');
          multipleOutputs.append(analyzed);
          multipleOutputs.append('"');
        }
        multipleOutputs.append(',');
        multipleOutputs.append('"');
        multipleOutputs.append(termAtt.toString());
        multipleOutputs.append('"');
      }
      stream.end();
      if (null != multipleOutputs) {
        throw new ParseException(
            String.format(getLocale(),
                "Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
      }
    } else {
      // nothing returned by analyzer.  Was it a stop word and the user accidentally
      // used an analyzer with stop words?
      stream.end();
      throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
    }
  } catch (IOException e){
    throw new ParseException(
        String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
  return analyzed;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:58,代码来源:AnalyzingQueryParser.java

示例15: getQuery

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Query getQuery(Element e) throws ParserException {
  String fieldsList = e.getAttribute("fieldNames"); //a comma-delimited list of fields
  String fields[] = defaultFieldNames;
  if ((fieldsList != null) && (fieldsList.trim().length() > 0)) {
    fields = fieldsList.trim().split(",");
    //trim the fieldnames
    for (int i = 0; i < fields.length; i++) {
      fields[i] = fields[i].trim();
    }
  }

  //Parse any "stopWords" attribute
  //TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then
  //I use all analyzers/fields to generate multi-field compatible stop list
  String stopWords = e.getAttribute("stopWords");
  Set<String> stopWordsSet = null;
  if ((stopWords != null) && (fields != null)) {
    stopWordsSet = new HashSet<>();
    for (String field : fields) {
      TokenStream ts = null;
      try {
        ts = analyzer.tokenStream(field, stopWords);
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
          stopWordsSet.add(termAtt.toString());
        }
        ts.end();
      } catch (IOException ioe) {
        throw new ParserException("IoException parsing stop words list in "
            + getClass().getName() + ":" + ioe.getLocalizedMessage());
      } finally {
        IOUtils.closeWhileHandlingException(ts);
      }
    }
  }


  MoreLikeThisQuery mlt = new MoreLikeThisQuery(DOMUtils.getText(e), fields, analyzer, fields[0]);
  mlt.setMaxQueryTerms(DOMUtils.getAttribute(e, "maxQueryTerms", DEFAULT_MAX_QUERY_TERMS));
  mlt.setMinTermFrequency(DOMUtils.getAttribute(e, "minTermFrequency", DEFAULT_MIN_TERM_FREQUENCY));
  mlt.setPercentTermsToMatch(DOMUtils.getAttribute(e, "percentTermsToMatch", DEFAULT_PERCENT_TERMS_TO_MATCH) / 100);
  mlt.setStopWords(stopWordsSet);
  int minDocFreq = DOMUtils.getAttribute(e, "minDocFreq", -1);
  if (minDocFreq >= 0) {
    mlt.setMinDocFreq(minDocFreq);
  }

  mlt.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));

  return mlt;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:54,代码来源:LikeThisQueryBuilder.java


注:本文中的org.apache.lucene.analysis.TokenStream.incrementToken方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。