当前位置: 首页>>代码示例>>C#>>正文


C# Lucene.Net.Analysis.Analyzer.TokenStream方法代码示例

本文整理汇总了C#中Lucene.Net.Analysis.Analyzer.TokenStream方法的典型用法代码示例。如果您正苦于以下问题:C# Lucene.Net.Analysis.Analyzer.TokenStream方法的具体用法?C# Lucene.Net.Analysis.Analyzer.TokenStream怎么用?C# Lucene.Net.Analysis.Analyzer.TokenStream使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Lucene.Net.Analysis.Analyzer的用法示例。


在下文中一共展示了Lucene.Net.Analysis.Analyzer.TokenStream方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: QueryTermVector

		public QueryTermVector(System.String queryString, Analyzer analyzer)
		{
			if (analyzer != null)
			{
				TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
				if (stream != null)
				{
					System.Collections.ArrayList terms = new System.Collections.ArrayList();
					try
					{
						bool hasMoreTokens = false;
						
						stream.Reset();
						TermAttribute termAtt = (TermAttribute) stream.AddAttribute(typeof(TermAttribute));
						
						hasMoreTokens = stream.IncrementToken();
						while (hasMoreTokens)
						{
							terms.Add(termAtt.Term());
							hasMoreTokens = stream.IncrementToken();
						}
						ProcessTerms((System.String[]) terms.ToArray(typeof(System.String)));
					}
					catch (System.IO.IOException e)
					{
					}
				}
			}
		}
开发者ID:Inzaghi2012,项目名称:teamlab.v7.5,代码行数:29,代码来源:QueryTermVector.cs

示例2: QueryTermVector

 public QueryTermVector(System.String queryString, Analyzer analyzer)
 {
     if (analyzer != null)
     {
         TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
         if (stream != null)
         {
             IList<string> terms = new List<string>();
             try
             {
                 bool hasMoreTokens = false;
                 
                 stream.Reset();
                 ITermAttribute termAtt = stream.AddAttribute<ITermAttribute>();
                 
                 hasMoreTokens = stream.IncrementToken();
                 while (hasMoreTokens)
                 {
                     terms.Add(termAtt.Term);
                     hasMoreTokens = stream.IncrementToken();
                 }
                 ProcessTerms(terms.ToArray());
             }
             catch (System.IO.IOException)
             {
             }
         }
     }
 }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:29,代码来源:QueryTermVector.cs

示例3: QueryTermVector

		public QueryTermVector(System.String queryString, Analyzer analyzer)
		{
			if (analyzer != null)
			{
				TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
				if (stream != null)
				{
					Token next = null;
					System.Collections.ArrayList terms = new System.Collections.ArrayList();
					try
					{
						while ((next = stream.Next()) != null)
						{
							terms.Add(next.TermText());
						}
						ProcessTerms((System.String[]) terms.ToArray(typeof(System.String)));
					}
					catch (System.IO.IOException)
					{
					}
				}
			}
		}
开发者ID:zweib730,项目名称:beagrep,代码行数:23,代码来源:QueryTermVector.cs

示例4: QueryTermVector

 public QueryTermVector(System.String queryString, Analyzer analyzer)
 {
     if (analyzer != null)
     {
         TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
         if (stream != null)
         {
             System.Collections.ArrayList terms = new System.Collections.ArrayList();
             try
             {
                 Token reusableToken = new Token();
                 for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
                 {
                     terms.Add(nextToken.Term());
                 }
                 ProcessTerms((System.String[]) terms.ToArray(typeof(System.String)));
             }
             catch (System.IO.IOException)
             {
             }
         }
     }
 }
开发者ID:cqm0609,项目名称:lucene-file-finder,代码行数:23,代码来源:QueryTermVector.cs

示例5: GetTokenStream

		//convenience method
		public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer)
		{
			Document doc = reader.Document(docId);
			System.String contents = doc.Get(field);
			if (contents == null)
			{
				throw new System.ArgumentException("Field " + field + " in document #" + docId + " is not stored and cannot be analyzed");
			}
			return analyzer.TokenStream(field, new System.IO.StringReader(contents));
		}
开发者ID:vikasraz,项目名称:indexsearchutils,代码行数:11,代码来源:TokenSources.cs

示例6: FormSimilarQuery

        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        /// 
        /// <p>
        /// 
        /// So, if you have a code fragment like this:
        /// <br>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        /// 
        /// <p>
        /// 
        /// </summary>
        /// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
        /// 
        /// <p>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        /// 
        /// <P>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
        /// throws
        /// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
        /// query as it is will be returned.
        /// 
        /// 
        /// 
        /// 
        /// 
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
        {
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
            TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));

            BooleanQuery tmp = new BooleanQuery();
            System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
            while (ts.IncrementToken())
            {
                String word = termAtt.Term();
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                    continue;
                // ignore dups
                if (already.Contains(word) == true)
                    continue;
                already.Add(word, word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, BooleanClause.Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return tmp;
        }
开发者ID:VirtueMe,项目名称:ravendb,代码行数:77,代码来源:SimilarityQueries.cs

示例7: CreateFieldQuery

        /// <summary>
        /// Creates a query from the analysis chain.
        /// <p>
        /// Expert: this is more useful for subclasses such as queryparsers.
        /// If using this class directly, just use <seealso cref="#createBooleanQuery(String, String)"/>
        /// and <seealso cref="#createPhraseQuery(String, String)"/> </summary>
        /// <param name="analyzer"> analyzer used for this query </param>
        /// <param name="operator"> default boolean operator used for this query </param>
        /// <param name="field"> field to create queries against </param>
        /// <param name="queryText"> text to be passed to the analysis chain </param>
        /// <param name="quoted"> true if phrases should be generated when terms occur at more than one position </param>
        /// <param name="phraseSlop"> slop factor for phrase/multiphrase queries </param>
        protected internal Query CreateFieldQuery(Analyzer analyzer, BooleanClause.Occur @operator, string field, string queryText, bool quoted, int phraseSlop)
        {
            Debug.Assert(@operator == BooleanClause.Occur.SHOULD || @operator == BooleanClause.Occur.MUST);
            // Use the analyzer to get all the tokens, and then build a TermQuery,
            // PhraseQuery, or nothing based on the term count
            CachingTokenFilter buffer = null;
            ITermToBytesRefAttribute termAtt = null;
            IPositionIncrementAttribute posIncrAtt = null;
            int numTokens = 0;
            int positionCount = 0;
            bool severalTokensAtSamePosition = false;
            bool hasMoreTokens = false;

            TokenStream source = null;
            try
            {
                source = analyzer.TokenStream(field, new StringReader(queryText));
                source.Reset();
                buffer = new CachingTokenFilter(source);
                buffer.Reset();

                if (buffer.HasAttribute<ITermToBytesRefAttribute>())
                {
                    termAtt = buffer.GetAttribute<ITermToBytesRefAttribute>();
                }
                if (buffer.HasAttribute<IPositionIncrementAttribute>())
                {
                    posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>();
                }

                if (termAtt != null)
                {
                    try
                    {
                        hasMoreTokens = buffer.IncrementToken();
                        while (hasMoreTokens)
                        {
                            numTokens++;
                            int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
                            if (positionIncrement != 0)
                            {
                                positionCount += positionIncrement;
                            }
                            else
                            {
                                severalTokensAtSamePosition = true;
                            }
                            hasMoreTokens = buffer.IncrementToken();
                        }
                    }
                    catch (System.IO.IOException)
                    {
                        // ignore
                    }
                }
            }
            catch (System.IO.IOException e)
            {
                throw new Exception("Error analyzing query text", e);
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(source);
            }

            // rewind the buffer stream
            buffer.Reset();

            BytesRef bytes = termAtt == null ? null : termAtt.BytesRef;

            if (numTokens == 0)
            {
                return null;
            }
            else if (numTokens == 1)
            {
                try
                {
                    bool hasNext = buffer.IncrementToken();
                    Debug.Assert(hasNext == true);
                    termAtt.FillBytesRef();
                }
                catch (System.IO.IOException)
                {
                    // safe to ignore, because we know the number of tokens
                }
                return NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
            }
//.........这里部分代码省略.........
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:101,代码来源:QueryBuilder.cs

示例8: GetBestFragment

		/// <summary> Highlights chosen terms in a text, extracting the most relevant section.
		/// This is a convenience method that calls
		/// {@link #GetBestFragment(TokenStream, String)}
		/// 
		/// </summary>
		/// <param name="analyzer">  the analyzer that will be used to split <code>text</code>
		/// into chunks  
		/// </param>
		/// <param name="text">text to highlight terms in
		/// </param>
		/// <param name="fieldName">Name of field used to influence analyzer's tokenization policy 
		/// 
		/// </param>
		/// <returns> highlighted text fragment or null if no terms found
		/// </returns>
		public System.String GetBestFragment(Analyzer analyzer, System.String fieldName, System.String text)
		{
			TokenStream tokenStream = analyzer.TokenStream(fieldName, new System.IO.StringReader(text));
			return GetBestFragment(tokenStream, text);
		}
开发者ID:vikasraz,项目名称:indexsearchutils,代码行数:20,代码来源:Highlighter.cs

示例9: GetBestFragments

		/// <summary> Highlights chosen terms in a text, extracting the most relevant sections.
		/// This is a convenience method that calls
		/// {@link #getBestFragments(TokenStream, String, int)}
		/// 
		/// </summary>
		/// <param name="analyzer">  the analyzer that will be used to split <code>text</code>
		/// into chunks  
		/// </param>
		/// <param name="fieldName">    the name of the field being highlighted (used by analyzer)
		/// </param>
		/// <param name="text">       	text to highlight terms in
		/// </param>
		/// <param name="maxNumFragments"> the maximum number of fragments.
		/// 
		/// </param>
		/// <returns> highlighted text fragments (between 0 and maxNumFragments number of fragments)
		/// </returns>
		public System.String[] GetBestFragments(Analyzer analyzer, System.String fieldName, System.String text, int maxNumFragments)
		{
			TokenStream tokenStream = analyzer.TokenStream(fieldName, new System.IO.StringReader(text));
			return GetBestFragments(tokenStream, text, maxNumFragments);
		}
开发者ID:vikasraz,项目名称:indexsearchutils,代码行数:22,代码来源:Highlighter.cs

示例10: GetBestFragments

 /// <summary> Highlights chosen terms in a text, extracting the most relevant sections.
 /// This is a convenience method that calls
 /// {@link #getBestFragments(TokenStream, String, int)}
 /// 
 /// </summary>
 /// <param name="analyzer">  the analyzer that will be used to split <code>text</code> into chunks </param>
 /// <param name="text">text to highlight terms in</param>
 /// <param name="maxNumFragments"> the maximum number of fragments.
 /// 
 /// </param>
 /// <returns> highlighted text fragments (between 0 and maxNumFragments number of fragments)
 /// </returns>
 public String[] GetBestFragments(Analyzer analyzer, string text, int maxNumFragments)
 {
     TokenStream tokenStream = analyzer.TokenStream("field", new StringReader(text));
     return GetBestFragments(tokenStream, text, maxNumFragments);
 }
开发者ID:usmanghani,项目名称:Misc,代码行数:17,代码来源:Highlighter.cs

示例11: FormSimilarQuery

 /// <summary> Simple similarity query generators.
 /// Takes every unique word and forms a boolean query where all words are optional.
 /// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
 /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
 /// need to then ignore that.
 /// 
 /// <p>
 /// 
 /// So, if you have a code fragment like this:
 /// <br>
 /// <code>
 /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
 /// </code>
 /// 
 /// <p>
 /// 
 /// </summary>
 /// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
 /// 
 /// <p>
 /// The philosophy behind this method is "two documents are similar if they share lots of words".
 /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
 /// 
 /// <P>
 /// This method is fail-safe in that if a long 'body' is passed in and
 /// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
 /// throws
 /// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
 /// query as it is will be returned.
 /// 
 /// 
 /// 
 /// 
 /// 
 /// </summary>
 /// <param name="body">the body of the document you want to find similar documents to
 /// </param>
 /// <param name="a">the analyzer to use to parse the body
 /// </param>
 /// <param name="field">the field you want to search on, probably something like "contents" or "body"
 /// </param>
 /// <param name="stop">optional set of stop words to ignore
 /// </param>
 /// <returns> a query with all unique words in 'body'
 /// </returns>
 /// <throws>  IOException this can't happen... </throws>
 public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
 {
     TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
     Lucene.Net.Analysis.Token t;
     BooleanQuery tmp = new BooleanQuery();
     System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
     while ((t = ts.Next()) != null)
     {
         System.String word = t.TermText();
         // ignore opt stop words
         if (stop != null && stop.Contains(word))
             continue;
         // ignore dups
         if (already.Contains(word) == true)
             continue;
         already.Add(word, word);
         // add to query
         TermQuery tq = new TermQuery(new Term(field, word));
         try
         {
             tmp.Add(tq, BooleanClause.Occur.SHOULD); //false, false);
         }
         catch (BooleanQuery.TooManyClauses too)
         {
             // fail-safe, just return what we have, not the end of the world
             break;
         }
     }
     return tmp;
 }
开发者ID:vikasraz,项目名称:indexsearchutils,代码行数:76,代码来源:SimilarityQueries.cs

示例12: GetTokenStream

        public TokenStream GetTokenStream(Analyzer analyzer)
        {
            if (!((FieldType)FieldType()).Indexed)
            {
                return null;
            }
            Number n = new Number();
            NumericType? numericType = ((FieldType)FieldType()).NumericTypeValue;
            if (numericType != null)
            {
                if (!(InternalTokenStream is NumericTokenStream))
                {
                    // lazy init the TokenStream as it is heavy to instantiate
                    // (attributes,...) if not needed (stored field loading)
                    InternalTokenStream = new NumericTokenStream(Type.NumericPrecisionStep);
                }
                NumericTokenStream nts = (NumericTokenStream)InternalTokenStream;
                // initialize value in TokenStream
                object val = FieldsData;
                switch (numericType)
                {
                    case NumericType.INT:
                        nts.SetIntValue(Convert.ToInt32(val));
                        break;

                    case NumericType.LONG:
                        nts.SetLongValue(Convert.ToInt64(val));
                        break;

                    case NumericType.FLOAT:
                        nts.SetFloatValue(Convert.ToSingle(val));
                        break;

                    case NumericType.DOUBLE:
                        nts.SetDoubleValue(Convert.ToDouble(val));
                        break;

                    default:
                        throw new Exception("Should never get here");
                }
                return InternalTokenStream;
            }

            if (!((FieldType)FieldType()).Tokenized)
            {
                if (StringValue == null)
                {
                    throw new System.ArgumentException("Non-Tokenized Fields must have a String value");
                }
                if (!(InternalTokenStream is StringTokenStream))
                {
                    // lazy init the TokenStream as it is heavy to instantiate
                    // (attributes,...) if not needed (stored field loading)
                    InternalTokenStream = new StringTokenStream();
                }
                ((StringTokenStream)InternalTokenStream).Value = StringValue;
                return InternalTokenStream;
            }

            if (TokenStream_Renamed != null)
            {
                return TokenStream_Renamed;
            }
            else if (ReaderValue != null)
            {
                return analyzer.TokenStream(Name(), ReaderValue);
            }
            else if (StringValue != null)
            {
                TextReader sr = new StringReader(StringValue);
                return analyzer.TokenStream(Name(), sr);
            }

            throw new System.ArgumentException("Field must have either TokenStream, String, Reader or Number value; got " + this);
        }
开发者ID:joyanta,项目名称:lucene.net,代码行数:75,代码来源:Field.cs

示例13: FormSimilarQuery

        /// <summary> Simple similarity query generators.
        /// Takes every unique word and forms a boolean query where all words are optional.
        /// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs.
        /// The only caveat is the first hit returned <b>should be</b> your source document - you'll
        /// need to then ignore that.
        /// 
        /// <p/>
        /// 
        /// So, if you have a code fragment like this:
        /// <br/>
        /// <code>
        /// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
        /// </code>
        /// 
        /// <p/>
        /// 
        ///  The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>.
        /// 
        /// <p/>
        /// The philosophy behind this method is "two documents are similar if they share lots of words".
        /// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
        /// 
        /// <P/>
        /// This method is fail-safe in that if a long 'body' is passed in and
        /// <see cref="BooleanQuery.Add"/> (used internally)
        /// throws
        /// <see cref="BooleanQuery.TooManyClauses"/>, the
        /// query as it is will be returned.
        /// </summary>
        /// <param name="body">the body of the document you want to find similar documents to
        /// </param>
        /// <param name="a">the analyzer to use to parse the body
        /// </param>
        /// <param name="field">the field you want to search on, probably something like "contents" or "body"
        /// </param>
        /// <param name="stop">optional set of stop words to ignore
        /// </param>
        /// <returns> a query with all unique words in 'body'
        /// </returns>
        /// <throws>  IOException this can't happen... </throws>
        public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, ISet<string> stop)
        {
            TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
            ITermAttribute termAtt = ts.AddAttribute<ITermAttribute>();

            BooleanQuery tmp = new BooleanQuery();
            ISet<string> already = Lucene.Net.Support.Compatibility.SetFactory.GetSet<string>(); // ignore dups
            while (ts.IncrementToken())
            {
                String word = termAtt.Term;
                // ignore opt stop words
                if (stop != null && stop.Contains(word))
                    continue;
                // ignore dups
                if (already.Contains(word))
                    continue;
                already.Add(word);
                // add to query
                TermQuery tq = new TermQuery(new Term(field, word));
                try
                {
                    tmp.Add(tq, Occur.SHOULD);
                }
                catch (BooleanQuery.TooManyClauses)
                {
                    // fail-safe, just return what we have, not the end of the world
                    break;
                }
            }
            return tmp;
        }
开发者ID:synhershko,项目名称:lucene.net,代码行数:71,代码来源:SimilarityQueries.cs


注:本文中的Lucene.Net.Analysis.Analyzer.TokenStream方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。