当前位置: 首页>>代码示例>>C#>>正文


C# Lucene.Net.Index.IndexReader.DocFreq方法代码示例

本文整理汇总了C#中Lucene.Net.Index.IndexReader.DocFreq方法的典型用法代码示例。如果您正苦于以下问题:C# Lucene.Net.Index.IndexReader.DocFreq方法的具体用法?C# Lucene.Net.Index.IndexReader.DocFreq怎么用?C# Lucene.Net.Index.IndexReader.DocFreq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Lucene.Net.Index.IndexReader的用法示例。


在下文中一共展示了Lucene.Net.Index.IndexReader.DocFreq方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: GetIdfWeightedTerms

		/// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
		/// 
		/// </summary>
		/// <param name="query">     Query to extract term texts from
		/// </param>
		/// <param name="reader">used to compute IDF which can be used to a) score selected fragments better 
		/// b) use graded highlights eg chaning intensity of font color
		/// </param>
		/// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based
		/// </param>
		/// <returns> an array of the terms used in a query, plus their weights.
		/// </returns>
		public static WeightedTerm[] GetIdfWeightedTerms(Query query, IndexReader reader, System.String fieldName)
		{
			WeightedTerm[] terms = GetTerms(query, false, fieldName);
			int totalNumDocs = reader.NumDocs();
			for (int i = 0; i < terms.Length; i++)
			{
				try
				{
					int docFreq = reader.DocFreq(new Term(fieldName, terms[i].term));
					//IDF algorithm taken from DefaultSimilarity class
					float idf = (float) (System.Math.Log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0);
					terms[i].weight *= idf;
				}
				catch (System.IO.IOException e)
				{
					//ignore 
				}
			}
			return terms;
		}
开发者ID:vikasraz,项目名称:indexsearchutils,代码行数:32,代码来源:QueryTermExtractor.cs

示例2: Explain

			public virtual Explanation Explain(IndexReader reader, int doc)
			{
				
				Explanation result = new Explanation();
				result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
				
				System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
				System.Text.StringBuilder query = new System.Text.StringBuilder();
				query.Append('\"');
				for (int i = 0; i < Enclosing_Instance.terms.Count; i++)
				{
					if (i != 0)
					{
						docFreqs.Append(" ");
						query.Append(" ");
					}
					
					Term term = (Term) Enclosing_Instance.terms[i];
					
					docFreqs.Append(term.Text());
					docFreqs.Append("=");
					docFreqs.Append(reader.DocFreq(term));
					
					query.Append(term.Text());
				}
				query.Append('\"');
				
				Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ": " + docFreqs + ")");
				
				// explain query weight
				Explanation queryExpl = new Explanation();
				queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
				
				Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
				if (Enclosing_Instance.GetBoost() != 1.0f)
					queryExpl.AddDetail(boostExpl);
				queryExpl.AddDetail(idfExpl);
				
				Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
				queryExpl.AddDetail(queryNormExpl);
				
				queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
				
				result.AddDetail(queryExpl);
				
				// explain field weight
				Explanation fieldExpl = new Explanation();
				fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:");
				
				Explanation tfExpl = Scorer(reader).Explain(doc);
				fieldExpl.AddDetail(tfExpl);
				fieldExpl.AddDetail(idfExpl);
				
				Explanation fieldNormExpl = new Explanation();
				byte[] fieldNorms = reader.Norms(Enclosing_Instance.field);
				float fieldNorm = fieldNorms != null ? Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;
				fieldNormExpl.SetValue(fieldNorm);
				fieldNormExpl.SetDescription("fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")");
				fieldExpl.AddDetail(fieldNormExpl);
				
				fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
				
				result.AddDetail(fieldExpl);
				
				// combine them
				result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
				
				if (queryExpl.GetValue() == 1.0f)
					return fieldExpl;
				
				return result;
			}
开发者ID:vikasraz,项目名称:indexsearchutils,代码行数:72,代码来源:PhraseQuery.cs

示例3: Explain

        public virtual Explanation Explain(IndexReader reader, int doc)
        {
            Explanation result = new Explanation();
            result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
            System.String field = ((SpanQuery) GetQuery()).GetField();

            System.Text.StringBuilder docFreqs = new System.Text.StringBuilder();
            System.Collections.IEnumerator i = terms.GetEnumerator();
            while (i.MoveNext())
            {
                System.Collections.DictionaryEntry tmp = (System.Collections.DictionaryEntry) i.Current;
                Term term = (Term) tmp.Key;
                docFreqs.Append(term.Text());
                docFreqs.Append("=");
                docFreqs.Append(reader.DocFreq(term));

                if (i.MoveNext())
                {
                    docFreqs.Append(" ");
                }
            }

            Explanation idfExpl = new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");

            // explain query weight
            Explanation queryExpl = new Explanation();
            queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");

            Explanation boostExpl = new Explanation(GetQuery().GetBoost(), "boost");
            if (GetQuery().GetBoost() != 1.0f)
                queryExpl.AddDetail(boostExpl);
            queryExpl.AddDetail(idfExpl);

            Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
            queryExpl.AddDetail(queryNormExpl);

            queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());

            result.AddDetail(queryExpl);

            // explain field weight
            Explanation fieldExpl = new Explanation();
            fieldExpl.SetDescription("fieldWeight(" + field + ":" + query.ToString(field) + " in " + doc + "), product of:");

            Explanation tfExpl = Scorer(reader).Explain(doc);
            fieldExpl.AddDetail(tfExpl);
            fieldExpl.AddDetail(idfExpl);

            Explanation fieldNormExpl = new Explanation();
            byte[] fieldNorms = reader.Norms(field);
            float fieldNorm = fieldNorms != null ? Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;
            fieldNormExpl.SetValue(fieldNorm);
            fieldNormExpl.SetDescription("fieldNorm(field=" + field + ", doc=" + doc + ")");
            fieldExpl.AddDetail(fieldNormExpl);

            fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());

            result.AddDetail(fieldExpl);

            // combine them
            result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());

            if (queryExpl.GetValue() == 1.0f)
                return fieldExpl;

            return result;
        }
开发者ID:vineelkovvuri,项目名称:ExtendableDesktopSearch,代码行数:67,代码来源:SpanWeight.cs

示例4: Rewrite

			public override Query Rewrite(IndexReader reader, MultiTermQuery query)
			{
				// Get the enum and start visiting terms.  If we
				// exhaust the enum before hitting either of the
				// cutoffs, we use ConstantBooleanQueryRewrite; else,
				// ConstantFilterRewrite:
				System.Collections.ArrayList pendingTerms = new System.Collections.ArrayList();
				int docCountCutoff = (int) ((docCountPercent / 100.0) * reader.MaxDoc());
				int termCountLimit = System.Math.Min(BooleanQuery.GetMaxClauseCount(), termCountCutoff);
				int docVisitCount = 0;
				
				FilteredTermEnum enumerator = query.GetEnum(reader);
				try
				{
					while (true)
					{
						Term t = enumerator.Term();
						if (t != null)
						{
							pendingTerms.Add(t);
							// Loading the TermInfo from the terms dict here
							// should not be costly, because 1) the
							// query/filter will load the TermInfo when it
							// runs, and 2) the terms dict has a cache:
							docVisitCount += reader.DocFreq(t);
						}
						
						if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff)
						{
							// Too many terms -- make a filter.
							Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query));
							result.SetBoost(query.GetBoost());
							return result;
						}
						else if (!enumerator.Next())
						{
							// Enumeration is done, and we hit a small
							// enough number of terms & docs -- just make a
							// BooleanQuery, now
							System.Collections.IEnumerator it = pendingTerms.GetEnumerator();
							BooleanQuery bq = new BooleanQuery(true);
							while (it.MoveNext())
							{
								TermQuery tq = new TermQuery((Term) it.Current);
								bq.Add(tq, BooleanClause.Occur.SHOULD);
							}
							// Strip scores
							Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
							result.SetBoost(query.GetBoost());
							query.IncTotalNumberOfTerms(pendingTerms.Count);
							return result;
						}
					}
				}
				finally
				{
					enumerator.Close();
				}
			}
开发者ID:Inzaghi2012,项目名称:teamlab.v7.5,代码行数:59,代码来源:MultiTermQuery.cs

示例5: Explain

			public virtual Explanation Explain(IndexReader reader, int doc)
			{
				
				ComplexExplanation result = new ComplexExplanation();
				result.SetDescription("weight(" + GetQuery() + " in " + doc + "), product of:");
				
				Explanation idfExpl = new Explanation(idf, "idf(docFreq=" + reader.DocFreq(Enclosing_Instance.term) + ", numDocs=" + reader.NumDocs() + ")");
				
				// explain query weight
				Explanation queryExpl = new Explanation();
				queryExpl.SetDescription("queryWeight(" + GetQuery() + "), product of:");
				
				Explanation boostExpl = new Explanation(Enclosing_Instance.GetBoost(), "boost");
				if (Enclosing_Instance.GetBoost() != 1.0f)
					queryExpl.AddDetail(boostExpl);
				queryExpl.AddDetail(idfExpl);
				
				Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm");
				queryExpl.AddDetail(queryNormExpl);
				
				queryExpl.SetValue(boostExpl.GetValue() * idfExpl.GetValue() * queryNormExpl.GetValue());
				
				result.AddDetail(queryExpl);
				
				// explain field weight
				System.String field = Enclosing_Instance.term.Field();
				ComplexExplanation fieldExpl = new ComplexExplanation();
				fieldExpl.SetDescription("fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:");
				
				Explanation tfExpl = Scorer(reader).Explain(doc);
				fieldExpl.AddDetail(tfExpl);
				fieldExpl.AddDetail(idfExpl);
				
				Explanation fieldNormExpl = new Explanation();
				byte[] fieldNorms = reader.Norms(field);
				float fieldNorm = fieldNorms != null ? Similarity.DecodeNorm(fieldNorms[doc]) : 0.0f;
				fieldNormExpl.SetValue(fieldNorm);
				fieldNormExpl.SetDescription("fieldNorm(field=" + field + ", doc=" + doc + ")");
				fieldExpl.AddDetail(fieldNormExpl);
				
				fieldExpl.SetMatch(tfExpl.IsMatch());
				fieldExpl.SetValue(tfExpl.GetValue() * idfExpl.GetValue() * fieldNormExpl.GetValue());
				
				result.AddDetail(fieldExpl);
				System.Boolean tempAux = fieldExpl.GetMatch();
				result.SetMatch(tempAux);
				
				// combine them
				result.SetValue(queryExpl.GetValue() * fieldExpl.GetValue());
				
				if (queryExpl.GetValue() == 1.0f)
					return fieldExpl;
				
				return result;
			}
开发者ID:vikasraz,项目名称:indexsearchutils,代码行数:55,代码来源:TermQuery.cs

示例6: SuggestSimilar

        /// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
        /// <param name="word">String the word you want a spell check done on
        /// </param>
        /// <param name="numSug">int the number of suggest words
        /// </param>
        /// <param name="ir">the indexReader of the user index (can be null see field param)
        /// </param>
        /// <param name="field">String the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field.
        /// </param>
        /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
        /// (only if restricted mode = (indexReader!=null and field!=null)
        /// </param>
        /// <throws>  IOException </throws>
        /// <returns> String[] the sorted list of the suggest words with this 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        /// </returns>
        public virtual System.String[] SuggestSimilar(System.String word, int numSug, IndexReader ir, System.String field, bool morePopular)
        {
            // obtainSearcher calls ensureOpen
            IndexSearcher indexSearcher = ObtainSearcher();
            try
            {
                float min = this.minScore;
                int lengthWord = word.Length;

                int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
                int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
                // if the word exists in the real index and we don't care for word frequency, return the word itself
                if (!morePopular && freq > 0)
                {
                    return new String[] { word };
                }

                var query = new BooleanQuery();
                String[] grams;
                String key;

                var alreadySeen = new HashSet<string>();
                for (var ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
                {
                    key = "gram" + ng; // form key

                    grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                    if (grams.Length == 0)
                    {
                        continue; // hmm
                    }

                    if (bStart > 0)
                    { // should we boost prefixes?
                        Add(query, "start" + ng, grams[0], bStart); // matches start of word

                    }
                    if (bEnd > 0)
                    { // should we boost suffixes
                        Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word

                    }
                    for (int i = 0; i < grams.Length; i++)
                    {
                        Add(query, key, grams[i]);
                    }
                }

                int maxHits = 10 * numSug;

                //    System.out.println("Q: " + query);
                ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
                //    System.out.println("HITS: " + hits.length());
                SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);

                // go thru more than 'maxr' matches in case the distance filter triggers
                int stop = Math.Min(hits.Length, maxHits);
                SuggestWord sugWord = new SuggestWord();
                for (int i = 0; i < stop; i++)
                {
                    sugWord.termString = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word

                    // don't suggest a word for itself, that would be silly
                    if (sugWord.termString.Equals(word))
                    {
                        continue;
                    }

                    // edit distance
                    sugWord.score = sd.GetDistance(word, sugWord.termString);
                    if (sugWord.score < min)
                    {
                        continue;
                    }

                    if (ir != null && field != null)
                    { // use the user index
                        sugWord.freq = ir.DocFreq(new Term(field, sugWord.termString)); // freq in the index
                        // don't suggest a word that is not present in the field
                        if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1)
                        {
//.........这里部分代码省略.........
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:101,代码来源:SpellChecker.cs

示例7: SuggestSimilar

        /// <summary> Suggest similar words (restricted or not to a field of a user index)</summary>
        /// <param name="word">String the word you want a spell check done on
        /// </param>
        /// <param name="num_sug">int the number of suggest words
        /// </param>
        /// <param name="ir">the indexReader of the user index (can be null see field param)
        /// </param>
        /// <param name="field">String the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field.
        /// </param>
        /// <param name="morePopular">boolean return only the suggest words that are more frequent than the searched word
        /// (only if restricted mode = (indexReader!=null and field!=null)
        /// </param>
        /// <throws>  IOException </throws>
        /// <returns> String[] the sorted list of the suggest words with this 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        /// </returns>
        public virtual System.String[] SuggestSimilar(System.String word, int num_sug, IndexReader ir, System.String field, bool morePopular)
        {
            float min = this.minScore;
            TRStringDistance sd = new TRStringDistance(word);
            int lengthWord = word.Length;
			
            int goalFreq = (morePopular && ir != null) ? ir.DocFreq(new Term(field, word)) : 0;
            if (!morePopular && goalFreq > 0)
            {
                return new System.String[]{word}; // return the word if it exist in the index and i don't want a more popular word
            }
			
            BooleanQuery query = new BooleanQuery();
            System.String[] grams;
            System.String key;
			
            for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
            {
				
                key = "gram" + ng; // form key
				
                grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
				
                if (grams.Length == 0)
                {
                    continue; // hmm
                }
				
                if (bStart > 0)
                {
                    // should we boost prefixes?
                    Add(query, "start" + ng, grams[0], bStart); // matches start of word
                }
                if (bEnd > 0)
                {
                    // should we boost suffixes
                    Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
                }
                for (int i = 0; i < grams.Length; i++)
                {
                    Add(query, key, grams[i]);
                }
            }
			
            IndexSearcher searcher = new IndexSearcher(this.spellindex);
            Hits hits = searcher.Search(query);
            SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);
			
            int stop = Math.Min(hits.Length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers
            SuggestWord sugword = new SuggestWord();
            for (int i = 0; i < stop; i++)
            {
				
                sugword.string_Renamed = hits.Doc(i).Get(F_WORD); // get orig word)
				
                if (sugword.string_Renamed.Equals(word))
                {
                    continue; // don't suggest a word for itself, that would be silly
                }
				
                //edit distance/normalize with the min word length
                sugword.score = 1.0f - ((float) sd.GetDistance(sugword.string_Renamed) / System.Math.Min(sugword.string_Renamed.Length, lengthWord));
                if (sugword.score < min)
                {
                    continue;
                }
				
                if (ir != null)
                {
                    // use the user index
                    sugword.freq = ir.DocFreq(new Term(field, sugword.string_Renamed)); // freq in the index
                    if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1)
                    {
                        // don't suggest a word that is not present in the field
                        continue;
                    }
                }
                sugqueue.Insert(sugword);
                if (sugqueue.Size() == num_sug)
                {
                    //if queue full , maintain the min score
                    min = ((SuggestWord) sugqueue.Top()).score;
//.........这里部分代码省略.........
开发者ID:Rationalle,项目名称:ravendb,代码行数:101,代码来源:SpellChecker.cs


注:本文中的Lucene.Net.Index.IndexReader.DocFreq方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。