本文整理汇总了C#中Lucene.Net.Index.IndexReader.DocFreq方法的典型用法代码示例。如果您正苦于以下问题:C# IndexReader.DocFreq方法的具体用法?C# IndexReader.DocFreq怎么用?C# IndexReader.DocFreq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Lucene.Net.Index.IndexReader
的用法示例。
在下文中一共展示了IndexReader.DocFreq方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: GetIdfWeightedTerms
/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
///
/// </summary>
/// <param name="query">Query to extract term texts from</param>
/// <param name="reader">used to compute IDF which can be used to a) score selected fragments better
/// b) use graded highlights eg chaning intensity of font color</param>
/// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based</param>
/// <returns> an array of the terms used in a query, plus their weights.</returns>
public static WeightedTerm[] GetIdfWeightedTerms(Query query, IndexReader reader, string fieldName)
{
WeightedTerm[] terms = GetTerms(query, false, fieldName);
int totalNumDocs = reader.NumDocs();
foreach (WeightedTerm t in terms)
{
try
{
int docFreq = reader.DocFreq(new Term(fieldName, t.Term));
// docFreq counts deletes
if (totalNumDocs < docFreq)
{
docFreq = totalNumDocs;
}
//IDF algorithm taken from DefaultSimilarity class
var idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0);
t.Weight *= idf;
}
catch (IOException e)
{
//ignore
}
}
return terms;
}
示例2: ScoreHits
////////////////////////////////////////////////////////////////
static private void ScoreHits (Dictionary<int, Hit> hits_by_id,
IndexReader reader,
ICollection term_list)
{
LNS.Similarity similarity;
similarity = LNS.Similarity.GetDefault ();
TermDocs term_docs = reader.TermDocs ();
Hit hit;
foreach (Term term in term_list) {
double idf;
idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ());
int hit_count;
hit_count = hits_by_id.Count;
term_docs.Seek (term);
while (term_docs.Next () && hit_count > 0) {
int id;
id = term_docs.Doc ();
if (hits_by_id.TryGetValue (id, out hit)) {
double tf;
tf = similarity.Tf (term_docs.Freq ());
hit.Score += tf * idf;
--hit_count;
}
}
}
term_docs.Close ();
}
示例3: DocFreq
private int DocFreq(IndexReader r, string term)
{
return r.DocFreq(new Term(FIELD, term));
}
示例4: SuggestSimilar
/// <summary>
/// Suggest similar words (optionally restricted to a field of an index).
///
/// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
/// is not the same as the edit distance strategy used to calculate the best
/// matching spell-checked word from the hits that Lucene found, one usually has
/// to retrieve a couple of numSug's in order to get the true best match.
///
/// </para>
/// <para>I.e. if numSug == 1, don't count on that suggestion being the best one.
/// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
///
/// </para>
/// </summary>
/// <param name="word"> the word you want a spell check done on </param>
/// <param name="numSug"> the number of suggested words </param>
/// <param name="ir"> the indexReader of the user index (can be null see field param) </param>
/// <param name="field"> the field of the user index: if field is not null, the suggested
/// words are restricted to the words present in this field. </param>
/// <param name="suggestMode">
/// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param>
/// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param>
/// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception>
/// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
/// <returns> String[] the sorted list of the suggest words with these 2 criteria:
/// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
/// of the suggest words in the field of the user index
/// </returns>
public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy)
{
// obtainSearcher calls ensureOpen
IndexSearcher indexSearcher = ObtainSearcher();
try
{
if (ir == null || field == null)
{
suggestMode = SuggestMode.SUGGEST_ALWAYS;
}
if (suggestMode == SuggestMode.SUGGEST_ALWAYS)
{
ir = null;
field = null;
}
int lengthWord = word.Length;
int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
// if the word exists in the real index and we don't care for word frequency, return the word itself
if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0)
{
return new string[] { word };
}
BooleanQuery query = new BooleanQuery();
string[] grams;
string key;
for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
{
key = "gram" + ng; // form key
grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
if (grams.Length == 0)
{
continue; // hmm
}
if (bStart > 0) // should we boost prefixes?
{
Add(query, "start" + ng, grams[0], bStart); // matches start of word
}
if (bEnd > 0) // should we boost suffixes
{
Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
}
for (int i = 0; i < grams.Length; i++)
{
Add(query, key, grams[i]);
}
}
int maxHits = 10 * numSug;
// System.out.println("Q: " + query);
ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
// System.out.println("HITS: " + hits.length());
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
// go thru more than 'maxr' matches in case the distance filter triggers
int stop = Math.Min(hits.Length, maxHits);
SuggestWord sugWord = new SuggestWord();
for (int i = 0; i < stop; i++)
{
[email protected] = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word
//.........这里部分代码省略.........