当前位置: 首页>>代码示例>>Java>>正文


Java CharsRefBuilder.toString方法代码示例

本文整理汇总了Java中org.apache.lucene.util.CharsRefBuilder.toString方法的典型用法代码示例。如果您正苦于以下问题:Java CharsRefBuilder.toString方法的具体用法?Java CharsRefBuilder.toString怎么用?Java CharsRefBuilder.toString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.util.CharsRefBuilder的用法示例。


在下文中一共展示了CharsRefBuilder.toString方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: addTermFrequencies

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap
 *          a Map of terms and their frequencies
 * @param vector
 *          List of terms and their frequencies for a doc/field
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException
{
  final TermsEnum termsEnum = vector.iterator();
  final CharsRefBuilder spare = new CharsRefBuilder();
  BytesRef text;
  while ((text = termsEnum.next()) != null) {
    spare.copyUTF8Bytes(text);
    final String term = spare.toString();
    if (isNoiseWord(term)) {
      continue;
    }
    final int freq = (int) termsEnum.totalTermFreq();

    // increment frequency
    Int cnt = termFreqMap.get(term);
    if (cnt == null) {
      cnt = new Int();
      termFreqMap.put(term, cnt);
      cnt.x = freq;
    }
    else {
      cnt.x += freq;
    }
  }
}
 
开发者ID:oeuvres,项目名称:Alix,代码行数:34,代码来源:MoreLikeThis.java

示例2: addTermFrequencies

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
  final TermsEnum termsEnum = vector.iterator(null);
  final CharsRefBuilder spare = new CharsRefBuilder();
  BytesRef text;
  while((text = termsEnum.next()) != null) {
    spare.copyUTF8Bytes(text);
    final String term = spare.toString();
    if (isNoiseWord(term)) {
      continue;
    }
    final int freq = (int) termsEnum.totalTermFreq();

    // increment frequency
    Int cnt = termFreqMap.get(term);
    if (cnt == null) {
      cnt = new Int();
      termFreqMap.put(term, cnt);
      cnt.x = freq;
    } else {
      cnt.x += freq;
    }
  }
}
 
开发者ID:europeana,项目名称:search,代码行数:30,代码来源:MoreLikeThis.java

示例3: innerExecute

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
@Override
protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name,
        final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    if (suggestionContext.getFieldType() != null) {
        final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
        CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
        spare.copyUTF8Bytes(suggestionContext.getText());
        CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(
            new Text(spare.toString()), 0, spare.length());
        completionSuggestion.addTerm(completionSuggestEntry);
        TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
        suggest(searcher, suggestionContext.toQuery(), collector);
        int numResult = 0;
        for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
            TopDocumentsCollector.SuggestDoc suggestDoc = (TopDocumentsCollector.SuggestDoc) suggestScoreDoc;
            // collect contexts
            Map<String, Set<CharSequence>> contexts = Collections.emptyMap();
            if (fieldType.hasContextMappings() && suggestDoc.getContexts().isEmpty() == false) {
                contexts = fieldType.getContextMappings().getNamedContexts(suggestDoc.getContexts());
            }
            if (numResult++ < suggestionContext.getSize()) {
                CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(suggestDoc.doc,
                    new Text(suggestDoc.key.toString()), suggestDoc.score, contexts);
                completionSuggestEntry.addOption(option);
            } else {
                break;
            }
        }
        return completionSuggestion;
    }
    return null;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:33,代码来源:CompletionSuggester.java

示例4: addTermFrequencies

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:39,代码来源:XMoreLikeThis.java

示例5: toUtf8

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
@Override
public String toUtf8() {
    if (length() == 0) {
        return "";
    }

    byte[] bytes = toBytes();
    final CharsRefBuilder ref = new CharsRefBuilder();
    ref.copyUTF8Bytes(bytes, offset, length);
    return ref.toString();
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:12,代码来源:PagedBytesReference.java

示例6: addTermFrequencies

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }
        
        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:39,代码来源:XMoreLikeThis.java

示例7: readTokensFromVector

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
private Map<String,Integer> readTokensFromVector(Terms vector) throws IOException {
    Map<String,Integer> tokenCounts = new HashMap<String,Integer>();
    final TermsEnum termsEnum = vector.iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        Integer count = tokenCounts.get(term);
        tokenCounts.put(term, count==null? 1: count+1 );
    }
    return tokenCounts;
}
 
开发者ID:DiceTechJobs,项目名称:SolrPlugins,代码行数:14,代码来源:NaiveBayesComponent.java

示例8: getTermsFromTermVectorField

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
public static List<String> getTermsFromTermVectorField(Terms vector) throws IOException {
    ArrayList<String> terms = new ArrayList<String>();

    final TermsEnum termsEnum = vector.iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;

    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        terms.add(term);
    }
    return terms;
}
 
开发者ID:DiceTechJobs,项目名称:SolrPlugins,代码行数:15,代码来源:TermExtractionHelper.java

示例9: buildResultEntry

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
private static PhraseSuggestion.Entry buildResultEntry(SuggestionContext suggestion, CharsRefBuilder spare, double cutoffScore) {
    spare.copyUTF8Bytes(suggestion.getText());
    return new PhraseSuggestion.Entry(new Text(spare.toString()), 0, spare.length(), cutoffScore);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:5,代码来源:PhraseSuggester.java

示例10: buildResultEntry

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
private PhraseSuggestion.Entry buildResultEntry(PhraseSuggestionContext suggestion, CharsRefBuilder spare, double cutoffScore) {
    spare.copyUTF8Bytes(suggestion.getText());
    return new PhraseSuggestion.Entry(new Text(spare.toString()), 0, spare.length(), cutoffScore);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:5,代码来源:PhraseSuggester.java

示例11: suggestSimilar

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRefBuilder spare) throws IOException {
  
  AttributeSource atts = new AttributeSource();
  MaxNonCompetitiveBoostAttribute maxBoostAtt =
    atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
  Terms terms = MultiFields.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
  
  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  BoostAttribute boostAtt =
    e.attributes().addAttribute(BoostAttribute.class);
  while ((candidateTerm = e.next()) != null) {
    final float boost = boostAtt.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
      continue;
    
    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm))
      continue;
    
    int df = e.docFreq();
    
    // check docFreq if required
    if (df <= docfreq)
      continue;
    
    final float score;
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
      // undo FuzzyTermsEnum's scale factor for a real scaled lev score
      score = boost / e.getScaleFactor() + e.getMinSimilarity();
    } else {
      spare.copyUTF8Bytes(candidateTerm);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }
    
    if (score < accuracy)
      continue;
    
    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = boost;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }
    
  return stQueue;
}
 
开发者ID:europeana,项目名称:search,代码行数:78,代码来源:DirectSpellChecker.java

示例12: getDetailedFieldInfo

import org.apache.lucene.util.CharsRefBuilder; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
    throws IOException {

  SolrParams params = req.getParams();
  final int numTerms = params.getInt( NUMTERMS, DEFAULT_COUNT );

  TopTermQueue tiq = new TopTermQueue(numTerms + 1);  // Something to collect the top N terms in.

  final CharsRefBuilder spare = new CharsRefBuilder();

  Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader());

  if (fields == null) { // No indexed fields
    return;
  }

  Terms terms = fields.terms(field);
  if (terms == null) {  // No terms in the field.
    return;
  }
  TermsEnum termsEnum = terms.iterator(null);
  BytesRef text;
  int[] buckets = new int[HIST_ARRAY_SIZE];
  while ((text = termsEnum.next()) != null) {
    ++tiq.distinctTerms;
    int freq = termsEnum.docFreq();  // This calculation seems odd, but it gives the same results as it used to.
    int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
    buckets[slot] = buckets[slot] + 1;
    if (numTerms > 0 && freq > tiq.minFreq) {
      spare.copyUTF8Bytes(text);
      String t = spare.toString();

      tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
      if (tiq.size() > numTerms) { // if tiq full
        tiq.pop(); // remove lowest in tiq
        tiq.minFreq = tiq.getTopTermInfo().docFreq;
      }
    }
  }
  tiq.histogram.add(buckets);
  fieldMap.add("distinct", tiq.distinctTerms);

  // Include top terms
  fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));

  // Add a histogram
  fieldMap.add("histogram", tiq.histogram.toNamedList());
}
 
开发者ID:europeana,项目名称:search,代码行数:50,代码来源:LukeRequestHandler.java


注:本文中的org.apache.lucene.util.CharsRefBuilder.toString方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。