当前位置: 首页>>代码示例>>C#>>正文


C# IndexReader.DocFreq方法代码示例

本文整理汇总了C#中Lucene.Net.Index.IndexReader.DocFreq方法的典型用法代码示例。如果您正苦于以下问题:C# IndexReader.DocFreq方法的具体用法?C# IndexReader.DocFreq怎么用?C# IndexReader.DocFreq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Lucene.Net.Index.IndexReader的用法示例。


在下文中一共展示了IndexReader.DocFreq方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: GetIdfWeightedTerms

 /// <summary> Extracts all terms texts of a given Query into an array of WeightedTerms
 /// 
 /// </summary>
 /// <param name="query">Query to extract term texts from</param>
 /// <param name="reader">used to compute IDF which can be used to a) score selected fragments better 
 /// b) use graded highlights eg chaning intensity of font color</param>
 /// <param name="fieldName">the field on which Inverse Document Frequency (IDF) calculations are based</param>
 /// <returns> an array of the terms used in a query, plus their weights.</returns>
 public static WeightedTerm[] GetIdfWeightedTerms(Query query, IndexReader reader, string fieldName)
 {
     WeightedTerm[] terms = GetTerms(query, false, fieldName);
     int totalNumDocs = reader.NumDocs();
     foreach (WeightedTerm t in terms)
     {
         try
         {
             int docFreq = reader.DocFreq(new Term(fieldName, t.Term));
             // docFreq counts deletes
             if (totalNumDocs < docFreq)
             {
                 docFreq = totalNumDocs;
             }
             //IDF algorithm taken from DefaultSimilarity class
             var idf = (float)(Math.Log((float)totalNumDocs / (double)(docFreq + 1)) + 1.0);
             t.Weight *= idf;
         }
         catch (IOException e)
         {
             //ignore
         }
     }
     return terms;
 }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:33,代码来源:QueryTermExtractor.cs

示例2: ScoreHits

		////////////////////////////////////////////////////////////////

		static private void ScoreHits (Dictionary<int, Hit>   hits_by_id,
					       IndexReader reader,
					       ICollection term_list)
		{
			LNS.Similarity similarity;
			similarity = LNS.Similarity.GetDefault ();

			TermDocs term_docs = reader.TermDocs ();
			Hit hit;

			foreach (Term term in term_list) {

				double idf;
				idf = similarity.Idf (reader.DocFreq (term), reader.MaxDoc ());

				int hit_count;
				hit_count = hits_by_id.Count;

				term_docs.Seek (term);
				while (term_docs.Next () && hit_count > 0) {
					
					int id;
					id = term_docs.Doc ();

					if (hits_by_id.TryGetValue (id, out hit)) {
						double tf;
						tf = similarity.Tf (term_docs.Freq ());
						hit.Score += tf * idf;
						--hit_count;
					}
				}
			}

			term_docs.Close ();
		}
开发者ID:ArsenShnurkov,项目名称:beagle-1,代码行数:37,代码来源:LuceneQueryingDriver.cs

示例3: DocFreq

 private int DocFreq(IndexReader r, string term)
 {
     return r.DocFreq(new Term(FIELD, term));
 }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:4,代码来源:TestTermsEnum.cs

示例4: SuggestSimilar

        /// <summary>
        /// Suggest similar words (optionally restricted to a field of an index).
        /// 
        /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
        /// is not the same as the edit distance strategy used to calculate the best
        /// matching spell-checked word from the hits that Lucene found, one usually has
        /// to retrieve a couple of numSug's in order to get the true best match.
        /// 
        /// </para>
        /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one.
        /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
        /// 
        /// </para>
        /// </summary>
        /// <param name="word"> the word you want a spell check done on </param>
        /// <param name="numSug"> the number of suggested words </param>
        /// <param name="ir"> the indexReader of the user index (can be null see field param) </param>
        /// <param name="field"> the field of the user index: if field is not null, the suggested
        /// words are restricted to the words present in this field. </param>
        /// <param name="suggestMode"> 
        /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param>
        /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param>
        /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception>
        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
        /// <returns> String[] the sorted list of the suggest words with these 2 criteria:
        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
        /// of the suggest words in the field of the user index
        ///  </returns>
        public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy)
        {
            // obtainSearcher calls ensureOpen
            IndexSearcher indexSearcher = ObtainSearcher();
            try
            {
                if (ir == null || field == null)
                {
                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
                }
                if (suggestMode == SuggestMode.SUGGEST_ALWAYS)
                {
                    ir = null;
                    field = null;
                }

                int lengthWord = word.Length;

                int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
                int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
                // if the word exists in the real index and we don't care for word frequency, return the word itself
                if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0)
                {
                    return new string[] { word };
                }

                BooleanQuery query = new BooleanQuery();
                string[] grams;
                string key;

                for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
                {

                    key = "gram" + ng; // form key

                    grams = FormGrams(word, ng); // form word into ngrams (allow dups too)

                    if (grams.Length == 0)
                    {
                        continue; // hmm
                    }

                    if (bStart > 0) // should we boost prefixes?
                    {
                        Add(query, "start" + ng, grams[0], bStart); // matches start of word

                    }
                    if (bEnd > 0) // should we boost suffixes
                    {
                        Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word

                    }
                    for (int i = 0; i < grams.Length; i++)
                    {
                        Add(query, key, grams[i]);
                    }
                }

                int maxHits = 10 * numSug;

                //    System.out.println("Q: " + query);
                ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
                //    System.out.println("HITS: " + hits.length());
                SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);

                // go thru more than 'maxr' matches in case the distance filter triggers
                int stop = Math.Min(hits.Length, maxHits);
                SuggestWord sugWord = new SuggestWord();
                for (int i = 0; i < stop; i++)
                {

                    [email protected] = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word
//.........这里部分代码省略.........
开发者ID:WakeflyCBass,项目名称:lucenenet,代码行数:101,代码来源:SpellChecker.cs


注:本文中的Lucene.Net.Index.IndexReader.DocFreq方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。