当前位置: 首页>>代码示例>>Java>>正文


Java TokenStream.end方法代码示例

本文整理汇总了Java中org.apache.lucene.analysis.TokenStream.end方法的典型用法代码示例。如果您正苦于以下问题:Java TokenStream.end方法的具体用法?Java TokenStream.end怎么用?Java TokenStream.end使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.analysis.TokenStream的用法示例。


在下文中一共展示了TokenStream.end方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: assertTokenStream

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
    tokenStream.reset();
    int index = 0;
    while (tokenStream.incrementToken() == true) {
        assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());

        if(expectedTypes != null) {
            assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
        }

        OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);

        if(expectedStartOffsets != null) {
            assertEquals(expectedStartOffsets[index], offsets.startOffset());
        }

        if(expectedEndOffsets != null) {
            assertEquals(expectedEndOffsets[index], offsets.endOffset());
        }

        index++;
    }
    tokenStream.end();
}
 
开发者ID:open-korean-text,项目名称:elasticsearch-analysis-openkoreantext,代码行数:25,代码来源:TokenStreamAssertions.java

示例2: assertCollation

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
    CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
    CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);

    stream1.reset();
    stream2.reset();

    assertThat(stream1.incrementToken(), equalTo(true));
    assertThat(stream2.incrementToken(), equalTo(true));
    assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
    assertThat(stream1.incrementToken(), equalTo(false));
    assertThat(stream2.incrementToken(), equalTo(false));

    stream1.end();
    stream2.end();

    stream1.close();
    stream2.close();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimpleIcuCollationTokenFilterTests.java

示例3: after

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@After
public void after(){

    if(analyzer != null){
        try {
            TokenStream ts = analyzer.tokenStream("field", text);
            CharTermAttribute ch = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            int i = 0;
            while (ts.incrementToken()) {
                i++;
                System.out.print(ch.toString() + "\t");
                if(i % 7 == 0){
                    System.out.println();
                }
            }
            ts.end();
            ts.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
 
开发者ID:followwwind,项目名称:apache,代码行数:24,代码来源:AnalyzerTest.java

示例4: getFilter

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Filter getFilter(Element e) throws ParserException {
  List<BytesRef> terms = new ArrayList<>();
  String text = DOMUtils.getNonBlankTextOrFail(e);
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");

  TokenStream ts = null;
  try {
    ts = analyzer.tokenStream(fieldName, text);
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();
    ts.reset();
    while (ts.incrementToken()) {
      termAtt.fillBytesRef();
      terms.add(BytesRef.deepCopyOf(bytes));
    }
    ts.end();
  }
  catch (IOException ioe) {
    throw new RuntimeException("Error constructing terms from index:" + ioe);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }
  return new TermsFilter(fieldName, terms);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:26,代码来源:TermsFilterBuilder.java

示例5: analyzeMultitermTerm

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
  if (analyzerIn == null) analyzerIn = getAnalyzer();

  TokenStream source = null;
  try {
    source = analyzerIn.tokenStream(field, part);
    source.reset();
    
    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();

    if (!source.incrementToken())
      throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
    termAtt.fillBytesRef();
    if (source.incrementToken())
      throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
    source.end();
    return BytesRef.deepCopyOf(bytes);
  } catch (IOException e) {
    throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
  } finally {
    IOUtils.closeWhileHandlingException(source);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:25,代码来源:QueryParserBase.java

示例6: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);
        PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:TransportAnalyzeAction.java

示例7: getSpanQuery

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public SpanQuery getSpanQuery(Element e) throws ParserException {
  String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
  String value = DOMUtils.getNonBlankTextOrFail(e);

  List<SpanQuery> clausesList = new ArrayList<>();

  TokenStream ts = null;
  try {
    ts = analyzer.tokenStream(fieldName, value);
    TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();
    ts.reset();
    while (ts.incrementToken()) {
      termAtt.fillBytesRef();
      SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
      clausesList.add(stq);
    }
    ts.end();
    SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
    soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
    return soq;
  }
  catch (IOException ioe) {
    throw new ParserException("IOException parsing value:" + value);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:30,代码来源:SpanOrTermsBuilder.java

示例8: analyze

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
    try {
        stream.reset();
        CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
        OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
        TypeAttribute type = stream.addAttribute(TypeAttribute.class);

        while (stream.incrementToken()) {
            int increment = posIncr.getPositionIncrement();
            if (increment > 0) {
                lastPosition = lastPosition + increment;
            }
            tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
                lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));

        }
        stream.end();
        lastOffset += offset.endOffset();
        lastPosition += posIncr.getPositionIncrement();

        lastPosition += analyzer.getPositionIncrementGap(field);
        lastOffset += analyzer.getOffsetGap(field);

    } catch (IOException e) {
        throw new ElasticsearchException("failed to analyze", e);
    } finally {
        IOUtils.closeWhileHandlingException(stream);
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:31,代码来源:TransportAnalyzeAction.java

示例9: termsFromTokenStream

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
private String[] termsFromTokenStream(TokenStream stream) throws IOException {

        List<String> outputTemp=new ArrayList<>();
        CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            outputTemp.add(charTermAttribute.toString());
        }
        stream.end();
        stream.close();

        return outputTemp.toArray(new String[0]);
    }
 
开发者ID:sebastian-hofstaetter,项目名称:ir-generalized-translation-models,代码行数:14,代码来源:SimilarityApiParser.java

示例10: analyzeSingleChunk

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/**
 * Returns the analyzed form for the given chunk
 * 
 * If the analyzer produces more than one output token from the given chunk,
 * a ParseException is thrown.
 *
 * @param field The target field
 * @param termStr The full term from which the given chunk is excerpted
 * @param chunk The portion of the given termStr to be analyzed
 * @return The result of analyzing the given chunk
 * @throws ParseException when analysis returns other than one output token
 */
protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
  String analyzed = null;
  TokenStream stream = null;
  try {
    stream = getAnalyzer().tokenStream(field, chunk);
    stream.reset();
    CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
    // get first and hopefully only output token
    if (stream.incrementToken()) {
      analyzed = termAtt.toString();
      
      // try to increment again, there should only be one output token
      StringBuilder multipleOutputs = null;
      while (stream.incrementToken()) {
        if (null == multipleOutputs) {
          multipleOutputs = new StringBuilder();
          multipleOutputs.append('"');
          multipleOutputs.append(analyzed);
          multipleOutputs.append('"');
        }
        multipleOutputs.append(',');
        multipleOutputs.append('"');
        multipleOutputs.append(termAtt.toString());
        multipleOutputs.append('"');
      }
      stream.end();
      if (null != multipleOutputs) {
        throw new ParseException(
            String.format(getLocale(),
                "Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
      }
    } else {
      // nothing returned by analyzer.  Was it a stop word and the user accidentally
      // used an analyzer with stop words?
      stream.end();
      throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
    }
  } catch (IOException e){
    throw new ParseException(
        String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
  return analyzed;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:58,代码来源:AnalyzingQueryParser.java

示例11: getQuery

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
@Override
public Query getQuery(Element e) throws ParserException {
  String fieldsList = e.getAttribute("fieldNames"); //a comma-delimited list of fields
  String fields[] = defaultFieldNames;
  if ((fieldsList != null) && (fieldsList.trim().length() > 0)) {
    fields = fieldsList.trim().split(",");
    //trim the fieldnames
    for (int i = 0; i < fields.length; i++) {
      fields[i] = fields[i].trim();
    }
  }

  //Parse any "stopWords" attribute
  //TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then
  //I use all analyzers/fields to generate multi-field compatible stop list
  String stopWords = e.getAttribute("stopWords");
  Set<String> stopWordsSet = null;
  if ((stopWords != null) && (fields != null)) {
    stopWordsSet = new HashSet<>();
    for (String field : fields) {
      TokenStream ts = null;
      try {
        ts = analyzer.tokenStream(field, stopWords);
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
          stopWordsSet.add(termAtt.toString());
        }
        ts.end();
      } catch (IOException ioe) {
        throw new ParserException("IoException parsing stop words list in "
            + getClass().getName() + ":" + ioe.getLocalizedMessage());
      } finally {
        IOUtils.closeWhileHandlingException(ts);
      }
    }
  }


  MoreLikeThisQuery mlt = new MoreLikeThisQuery(DOMUtils.getText(e), fields, analyzer, fields[0]);
  mlt.setMaxQueryTerms(DOMUtils.getAttribute(e, "maxQueryTerms", DEFAULT_MAX_QUERY_TERMS));
  mlt.setMinTermFrequency(DOMUtils.getAttribute(e, "minTermFrequency", DEFAULT_MIN_TERM_FREQUENCY));
  mlt.setPercentTermsToMatch(DOMUtils.getAttribute(e, "percentTermsToMatch", DEFAULT_PERCENT_TERMS_TO_MATCH) / 100);
  mlt.setStopWords(stopWordsSet);
  int minDocFreq = DOMUtils.getAttribute(e, "minDocFreq", -1);
  if (minDocFreq >= 0) {
    mlt.setMinDocFreq(minDocFreq);
  }

  mlt.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));

  return mlt;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:54,代码来源:LikeThisQueryBuilder.java

示例12: addTermWeights

import org.apache.lucene.analysis.TokenStream; //导入方法依赖的package包/类
/**
 * Adds term weights found by tokenizing text from reader into the Map words
 *
 * @param reader a source of text to be tokenized
 * @param termWeightMap a Map of terms and their weights
 * @param fieldName Used by analyzer for any special per-field analysis
 */
private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName)
        throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException("To use RelevancyFeedback without " +
                "term vectors, you must provide an Analyzer");
    }

    TokenStream ts = analyzer.tokenStream(fieldName, reader);
    try {
        int tokenCount = 0;
        // for every token
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class);

        ts.reset();
        while (ts.incrementToken()) {
            String word = termAtt.toString();
            tokenCount++;
            if (tokenCount > maxNumTokensParsedPerField) {
                break;
            }
            if(word.trim().length() == 0){
                continue;
            }
            if (isNoiseWord(word)) {
                continue;
            }

            BytesRef payload = payloadAttr.getPayload();
            float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field
            if(isPayloadField(fieldName) && payload != null){
                tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
            }
            // increment frequency
            Flt termWeight = termWeightMap.get(word);
            if (termWeight == null) {
                termWeightMap.put(word, new Flt(tokenWeight));
            } else {
                termWeight.x += tokenWeight;
            }
        }
        ts.end();
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}
 
开发者ID:DiceTechJobs,项目名称:RelevancyFeedback,代码行数:54,代码来源:RelevancyFeedback.java


注:本文中的org.apache.lucene.analysis.TokenStream.end方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。