本文整理汇总了C#中Lucene.Net.Index.IndexReader.DocFreq方法的典型用法代码示例。如果您正苦于以下问题:C# Lucene.Net.Index.IndexReader.DocFreq方法的具体用法?C# Lucene.Net.Index.IndexReader.DocFreq怎么用?C# Lucene.Net.Index.IndexReader.DocFreq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Lucene.Net.Index.IndexReader
的用法示例。
在下文中一共展示了Lucene.Net.Index.IndexReader.DocFreq方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: GetIdfWeightedTerms
/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
///
/// </summary>
/// <param name="query"> Query to extract term texts from
/// </param>
/// <param name="reader">used to compute IDF which can be used to a) score selected fragments better
/// b) use graded highlights eg chaning intensity of font color
/// </param>
/// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based
/// </param>
/// <returns> an array of the terms used in a query, plus their weights.
/// </returns>
public static WeightedTerm[] GetIdfWeightedTerms(Query query, IndexReader reader, System.String fieldName)
{
WeightedTerm[] terms = GetTerms(query, false, fieldName);
int totalNumDocs = reader.NumDocs();
for (int i = 0; i < terms.Length; i++)
{
try
{
int docFreq = reader.DocFreq(new Term(fieldName, terms[i].term));
//IDF algorithm taken from DefaultSimilarity class
float idf = (float) (System.Math.Log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0);
terms[i].weight *= idf;
}
catch (System.IO.IOException e)
{
//ignore
}
}
return terms;
}
示例2: Explain
public virtual Explanation Explain(IndexReader reader, int doc)
{
Explanation result = new Explanation();
result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
System.Text.StringBuilder query = new System.Text.StringBuilder();
query.Append('\"');
for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
{
if (i != 0)
{
docFreqs.Append(" ");
query.Append(" ");
}
Term term = (Term) Enclosing_Instance.terms[i];
docFreqs.Append(term.Text());
docFreqs.Append("=");
docFreqs.Append(reader.DocFreq(term));
query.Append(term.Text());
}
query.Append('\"');
Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")");
// explain query weight
Explanation queryExpl = new Explanation();
queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
if (Enclosing_Instance.GetBoost() != 1.0f)
queryExpl.AddDetail(boostExpl);
queryExpl.AddDetail(idfExpl);
Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
queryExpl.AddDetail(queryNormExpl);
queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
result.AddDetail(queryExpl);
// explain field weight
Explanation fieldExpl = new Explanation();
fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:");
Explanation tfExpl = Scorer(reader).Explain(doc);
fieldExpl.AddDetail(tfExpl);
fieldExpl.AddDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
float fieldNorm = fieldNorms != null ? Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;
fieldNormExpl.SetValue(fieldNorm);
fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
fieldExpl.AddDetail(fieldNormExpl);
fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
result.AddDetail(fieldExpl);
// combine them
result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
if (queryExpl.GetValue() == 1.0f)
return fieldExpl;
return result;
}
示例3: Explain
public virtual Explanation Explain(IndexReader reader, int doc)
{
Explanation result = new Explanation();
result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
System.String field = ((SpanQuery) GetQuery()).GetField();
System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
System.Collections.IEnumerator i = terms.GetEnumerator();
while (i.MoveNext())
{
System.Collections.DictionaryEntry tmp = (System.Collections.DictionaryEntry) i.Current;
Term term = (Term) tmp.Key;
docFreqs.Append(term.Text());
docFreqs.Append("=");
docFreqs.Append(reader.DocFreq(term));
if (i.MoveNext())
{
docFreqs.Append(" ");
}
}
Explanation idfExpl = new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
// explain query weight
Explanation queryExpl = new Explanation();
queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
Explanation boostExpl = new Explanation(GetQuery().GetBoost(), "boost");
if (GetQuery().GetBoost() != 1.0f)
queryExpl.AddDetail(boostExpl);
queryExpl.AddDetail(idfExpl);
Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
queryExpl.AddDetail(queryNormExpl);
queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
result.AddDetail(queryExpl);
// explain field weight
Explanation fieldExpl = new Explanation();
fieldExpl.SetDescription("fieldWeight(" + field + ":" + query.ToString(field) + " in " + doc + "), product of:");
Explanation tfExpl = Scorer(reader).Explain(doc);
fieldExpl.AddDetail(tfExpl);
fieldExpl.AddDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.Norms(field);
float fieldNorm = fieldNorms != null ? Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;
fieldNormExpl.SetValue(fieldNorm);
fieldNormExpl.SetDescription("fieldNorm(field=" + field + ", doc=" + doc + ")");
fieldExpl.AddDetail(fieldNormExpl);
fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
result.AddDetail(fieldExpl);
// combine them
result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
if (queryExpl.GetValue() == 1.0f)
return fieldExpl;
return result;
}
示例4: Rewrite
public override Query Rewrite(IndexReader reader, MultiTermQuery query)
{
// Get the enum and start visiting terms. If we
// exhaust the enum before hitting either of the
// cutoffs, we use ConstantBooleanQueryRewrite; else,
// ConstantFilterRewrite:
System.Collections.ArrayList pendingTerms = new System.Collections.ArrayList();
int docCountCutoff = (int) ((docCountPercent / 100.0) * reader.MaxDoc());
int termCountLimit = System.Math.Min(BooleanQuery.GetMaxClauseCount(), termCountCutoff);
int docVisitCount = 0;
FilteredTermEnum enumerator = query.GetEnum(reader);
try
{
while (true)
{
Term t = enumerator.Term();
if (t != null)
{
pendingTerms.Add(t);
// Loading the TermInfo from the terms dict here
// should not be costly, because 1) the
// query/filter will load the TermInfo when it
// runs, and 2) the terms dict has a cache:
docVisitCount += reader.DocFreq(t);
}
if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff)
{
// Too many terms -- make a filter.
Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));
result.SetBoost(query.GetBoost());
return result;
}
else if (!enumerator.Next())
{
// Enumeration is done, and we hit a small
// enough number of terms & docs -- just make a
// BooleanQuery, now
System.Collections.IEnumerator it = pendingTerms.GetEnumerator();
BooleanQuery bq = new BooleanQuery(true);
while (it.MoveNext())
{
TermQuery tq = new TermQuery((Term) it.Current);
bq.Add(tq, BooleanClause.Occur.SHOULD);
}
// Strip scores
Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
result.SetBoost(query.GetBoost());
query.IncTotalNumberOfTerms(pendingTerms.Count);
return result;
}
}
}
finally
{
enumerator.Close();
}
}
示例5: Explain
public virtual Explanation Explain(IndexReader reader, int doc)
{
ComplexExplanation result = new ComplexExplanation();
result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
Explanation idfExpl = new Explanation(idf, "idf(docFreq=" + reader.DocFreq(Enclosing_Instance.term) + ", numDocs=" + reader.NumDocs() + ")");
// explain query weight
Explanation queryExpl = new Explanation();
queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
if (Enclosing_Instance.GetBoost() != 1.0f)
queryExpl.AddDetail(boostExpl);
queryExpl.AddDetail(idfExpl);
Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
queryExpl.AddDetail(queryNormExpl);
queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
result.AddDetail(queryExpl);
// explain field weight
System.String field = Enclosing_Instance.term.Field();
ComplexExplanation fieldExpl = new ComplexExplanation();
fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:");
Explanation tfExpl = Scorer(reader).Explain(doc);
fieldExpl.AddDetail(tfExpl);
fieldExpl.AddDetail(idfExpl);
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.Norms(field);
float fieldNorm = fieldNorms != null ? Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;
fieldNormExpl.SetValue(fieldNorm);
fieldNormExpl.SetDescription("fieldNorm(field=" + field + ", doc=" + doc + ")");
fieldExpl.AddDetail(fieldNormExpl);
fieldExpl.SetMatch(tfExpl.IsMatch());
fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
result.AddDetail(fieldExpl);
System.Boolean tempAux = fieldExpl.GetMatch();
result.SetMatch(tempAux);
// combine them
result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
if (queryExpl.GetValue() == 1.0f)
return fieldExpl;
return result;
}
示例6: SuggestSimilar
/// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
/// <param name="word">String the word you want a spell check done on
/// </param>
/// <param name="numSug">int the number of suggest words
/// </param>
/// <param name="ir">the indexReader of the user index (can be null see field param)
/// </param>
/// <param name="field">String the field of the user index: if field is not null, the suggested
/// words are restricted to the words present in this field.
/// </param>
/// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
/// (only if restricted mode = (indexReader!=null and field!=null)
/// </param>
/// <throws> IOException </throws>
/// <returns> String[] the sorted list of the suggest words with this 2 criteria:
/// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
/// of the suggest words in the field of the user index
/// </returns>
public virtual System.String[] SuggestSimilar(System.String word, int numSug, IndexReader ir, System.String field, bool morePopular)
{
// obtainSearcher calls ensureOpen
IndexSearcher indexSearcher = ObtainSearcher();
try
{
float min = this.minScore;
int lengthWord = word.Length;
int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
// if the word exists in the real index and we don't care for word frequency, return the word itself
if (!morePopular && freq > 0)
{
return new String[] { word };
}
var query = new BooleanQuery();
String[] grams;
String key;
var alreadySeen = new HashSet<string>();
for (var ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
{
key = "gram" + ng; // form key
grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
if (grams.Length == 0)
{
continue; // hmm
}
if (bStart > 0)
{ // should we boost prefixes?
Add(query, "start" + ng, grams[0], bStart); // matches start of word
}
if (bEnd > 0)
{ // should we boost suffixes
Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
}
for (int i = 0; i < grams.Length; i++)
{
Add(query, key, grams[i]);
}
}
int maxHits = 10 * numSug;
// System.out.println("Q: " + query);
ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
// System.out.println("HITS: " + hits.length());
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
// go thru more than 'maxr' matches in case the distance filter triggers
int stop = Math.Min(hits.Length, maxHits);
SuggestWord sugWord = new SuggestWord();
for (int i = 0; i < stop; i++)
{
sugWord.termString = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word
// don't suggest a word for itself, that would be silly
if (sugWord.termString.Equals(word))
{
continue;
}
// edit distance
sugWord.score = sd.GetDistance(word, sugWord.termString);
if (sugWord.score < min)
{
continue;
}
if (ir != null && field != null)
{ // use the user index
sugWord.freq = ir.DocFreq(new Term(field, sugWord.termString)); // freq in the index
// don't suggest a word that is not present in the field
if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1)
{
//.........这里部分代码省略.........
示例7: SuggestSimilar
/// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
/// <param name="word">String the word you want a spell check done on
/// </param>
/// <param name="num_sug">int the number of suggest words
/// </param>
/// <param name="ir">the indexReader of the user index (can be null see field param)
/// </param>
/// <param name="field">String the field of the user index: if field is not null, the suggested
/// words are restricted to the words present in this field.
/// </param>
/// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
/// (only if restricted mode = (indexReader!=null and field!=null)
/// </param>
/// <throws> IOException </throws>
/// <returns> String[] the sorted list of the suggest words with this 2 criteria:
/// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
/// of the suggest words in the field of the user index
/// </returns>
public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular)
{
float min = this.minScore;
TRStringDistance sd = new TRStringDistance(word);
int lengthWord = word.Length;
int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0;
if (!morePopular && goalFreq > 0)
{
return new System.String[]{word}; // return the word if it exist in the index and i don't want a more popular word
}
BooleanQuery query = new BooleanQuery();
System.String[] grams;
System.String key;
for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
{
key = "gram" + ng; // form key
grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
if (grams.Length == 0)
{
continue; // hmm
}
if (bStart > 0)
{
// should we boost prefixes?
Add(query, "start" + ng, grams[0], bStart); // matches start of word
}
if (bEnd > 0)
{
// should we boost suffixes
Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
}
for (int i = 0; i < grams.Length; i++)
{
Add(query, key, grams[i]);
}
}
IndexSearcher searcher = new IndexSearcher(this.spellindex);
Hits hits = searcher.Search(query);
SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);
int stop = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers
SuggestWord sugword = new SuggestWord();
for (int i = 0; i < stop; i++)
{
sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word)
if (sugword.string_Renamed.Equals(word))
{
continue; // don't suggest a word for itself, that would be silly
}
//edit distance/normalize with the min word length
sugword.score = 1.0f - ((float) sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord));
if (sugword.score < min)
{
continue;
}
if (ir != null)
{
// use the user index
sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index
if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1)
{
// don't suggest a word that is not present in the field
continue;
}
}
sugqueue.Insert(sugword);
if (sugqueue.Size() == num_sug)
{
//if queue full , maintain the min score
min = ((SuggestWord) sugqueue.Top()).score;
//.........这里部分代码省略.........