本文整理汇总了C#中Lucene.Net.Analysis.Analyzer.TokenStream方法的典型用法代码示例。如果您正苦于以下问题:C# Lucene.Net.Analysis.Analyzer.TokenStream方法的具体用法?C# Lucene.Net.Analysis.Analyzer.TokenStream怎么用?C# Lucene.Net.Analysis.Analyzer.TokenStream使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Lucene.Net.Analysis.Analyzer
的用法示例。
在下文中一共展示了Lucene.Net.Analysis.Analyzer.TokenStream方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: QueryTermVector
public QueryTermVector(System.String queryString, Analyzer analyzer)
{
if (analyzer != null)
{
TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
if (stream != null)
{
System.Collections.ArrayList terms = new System.Collections.ArrayList();
try
{
bool hasMoreTokens = false;
stream.Reset();
TermAttribute termAtt = (TermAttribute) stream.AddAttribute(typeof(TermAttribute));
hasMoreTokens = stream.IncrementToken();
while (hasMoreTokens)
{
terms.Add(termAtt.Term());
hasMoreTokens = stream.IncrementToken();
}
ProcessTerms((System.String[]) terms.ToArray(typeof(System.String)));
}
catch (System.IO.IOException e)
{
}
}
}
}
示例2: QueryTermVector
public QueryTermVector(System.String queryString, Analyzer analyzer)
{
if (analyzer != null)
{
TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
if (stream != null)
{
IList<string> terms = new List<string>();
try
{
bool hasMoreTokens = false;
stream.Reset();
ITermAttribute termAtt = stream.AddAttribute<ITermAttribute>();
hasMoreTokens = stream.IncrementToken();
while (hasMoreTokens)
{
terms.Add(termAtt.Term);
hasMoreTokens = stream.IncrementToken();
}
ProcessTerms(terms.ToArray());
}
catch (System.IO.IOException)
{
}
}
}
}
示例3: QueryTermVector
public QueryTermVector(System.String queryString, Analyzer analyzer)
{
if (analyzer != null)
{
TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
if (stream != null)
{
Token next = null;
System.Collections.ArrayList terms = new System.Collections.ArrayList();
try
{
while ((next = stream.Next()) != null)
{
terms.Add(next.TermText());
}
ProcessTerms((System.String[]) terms.ToArray(typeof(System.String)));
}
catch (System.IO.IOException)
{
}
}
}
}
示例4: QueryTermVector
public QueryTermVector(System.String queryString, Analyzer analyzer)
{
if (analyzer != null)
{
TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
if (stream != null)
{
System.Collections.ArrayList terms = new System.Collections.ArrayList();
try
{
Token reusableToken = new Token();
for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken))
{
terms.Add(nextToken.Term());
}
ProcessTerms((System.String[]) terms.ToArray(typeof(System.String)));
}
catch (System.IO.IOException)
{
}
}
}
}
示例5: GetTokenStream
//convenience method
public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer)
{
Document doc = reader.Document(docId);
System.String contents = doc.Get(field);
if (contents == null)
{
throw new System.ArgumentException("Field " + field + " in document #" + docId + " is not stored and cannot be analyzed");
}
return analyzer.TokenStream(field, new System.IO.StringReader(contents));
}
示例6: FormSimilarQuery
/// <summary> Simple similarity query generators.
/// Takes every unique word and forms a boolean query where all words are optional.
/// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
/// The only caveat is the first hit returned <b>should be</b> your source document - you'll
/// need to then ignore that.
///
/// <p>
///
/// So, if you have a code fragment like this:
/// <br>
/// <code>
/// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
/// </code>
///
/// <p>
///
/// </summary>
/// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
///
/// <p>
/// The philosophy behind this method is "two documents are similar if they share lots of words".
/// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
///
/// <P>
/// This method is fail-safe in that if a long 'body' is passed in and
/// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
/// throws
/// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
/// query as it is will be returned.
///
///
///
///
///
/// </summary>
/// <param name="body">the body of the document you want to find similar documents to
/// </param>
/// <param name="a">the analyzer to use to parse the body
/// </param>
/// <param name="field">the field you want to search on, probably something like "contents" or "body"
/// </param>
/// <param name="stop">optional set of stop words to ignore
/// </param>
/// <returns> a query with all unique words in 'body'
/// </returns>
/// <throws> IOException this can't happen... </throws>
public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
{
TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
TermAttribute termAtt = (TermAttribute)ts.AddAttribute(typeof(TermAttribute));
BooleanQuery tmp = new BooleanQuery();
System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
while (ts.IncrementToken())
{
String word = termAtt.Term();
// ignore opt stop words
if (stop != null && stop.Contains(word))
continue;
// ignore dups
if (already.Contains(word) == true)
continue;
already.Add(word, word);
// add to query
TermQuery tq = new TermQuery(new Term(field, word));
try
{
tmp.Add(tq, BooleanClause.Occur.SHOULD);
}
catch (BooleanQuery.TooManyClauses)
{
// fail-safe, just return what we have, not the end of the world
break;
}
}
return tmp;
}
示例7: CreateFieldQuery
/// <summary>
/// Creates a query from the analysis chain.
/// <p>
/// Expert: this is more useful for subclasses such as queryparsers.
/// If using this class directly, just use <seealso cref="#createBooleanQuery(String, String)"/>
/// and <seealso cref="#createPhraseQuery(String, String)"/> </summary>
/// <param name="analyzer"> analyzer used for this query </param>
/// <param name="operator"> default boolean operator used for this query </param>
/// <param name="field"> field to create queries against </param>
/// <param name="queryText"> text to be passed to the analysis chain </param>
/// <param name="quoted"> true if phrases should be generated when terms occur at more than one position </param>
/// <param name="phraseSlop"> slop factor for phrase/multiphrase queries </param>
protected internal Query CreateFieldQuery(Analyzer analyzer, BooleanClause.Occur @operator, string field, string queryText, bool quoted, int phraseSlop)
{
Debug.Assert(@operator == BooleanClause.Occur.SHOULD || @operator == BooleanClause.Occur.MUST);
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
CachingTokenFilter buffer = null;
ITermToBytesRefAttribute termAtt = null;
IPositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
int positionCount = 0;
bool severalTokensAtSamePosition = false;
bool hasMoreTokens = false;
TokenStream source = null;
try
{
source = analyzer.TokenStream(field, new StringReader(queryText));
source.Reset();
buffer = new CachingTokenFilter(source);
buffer.Reset();
if (buffer.HasAttribute<ITermToBytesRefAttribute>())
{
termAtt = buffer.GetAttribute<ITermToBytesRefAttribute>();
}
if (buffer.HasAttribute<IPositionIncrementAttribute>())
{
posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>();
}
if (termAtt != null)
{
try
{
hasMoreTokens = buffer.IncrementToken();
while (hasMoreTokens)
{
numTokens++;
int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1;
if (positionIncrement != 0)
{
positionCount += positionIncrement;
}
else
{
severalTokensAtSamePosition = true;
}
hasMoreTokens = buffer.IncrementToken();
}
}
catch (System.IO.IOException)
{
// ignore
}
}
}
catch (System.IO.IOException e)
{
throw new Exception("Error analyzing query text", e);
}
finally
{
IOUtils.CloseWhileHandlingException(source);
}
// rewind the buffer stream
buffer.Reset();
BytesRef bytes = termAtt == null ? null : termAtt.BytesRef;
if (numTokens == 0)
{
return null;
}
else if (numTokens == 1)
{
try
{
bool hasNext = buffer.IncrementToken();
Debug.Assert(hasNext == true);
termAtt.FillBytesRef();
}
catch (System.IO.IOException)
{
// safe to ignore, because we know the number of tokens
}
return NewTermQuery(new Term(field, BytesRef.DeepCopyOf(bytes)));
}
//.........这里部分代码省略.........
示例8: GetBestFragment
/// <summary> Highlights chosen terms in a text, extracting the most relevant section.
/// This is a convenience method that calls
/// {@link #GetBestFragment(TokenStream, String)}
///
/// </summary>
/// <param name="analyzer"> the analyzer that will be used to split <code>text</code>
/// into chunks
/// </param>
/// <param name="text">text to highlight terms in
/// </param>
/// <param name="fieldName">Name of field used to influence analyzer's tokenization policy
///
/// </param>
/// <returns> highlighted text fragment or null if no terms found
/// </returns>
public System.String GetBestFragment(Analyzer analyzer, System.String fieldName, System.String text)
{
TokenStream tokenStream = analyzer.TokenStream(fieldName, new System.IO.StringReader(text));
return GetBestFragment(tokenStream, text);
}
示例9: GetBestFragments
/// <summary> Highlights chosen terms in a text, extracting the most relevant sections.
/// This is a convenience method that calls
/// {@link #getBestFragments(TokenStream, String, int)}
///
/// </summary>
/// <param name="analyzer"> the analyzer that will be used to split <code>text</code>
/// into chunks
/// </param>
/// <param name="fieldName"> the name of the field being highlighted (used by analyzer)
/// </param>
/// <param name="text"> text to highlight terms in
/// </param>
/// <param name="maxNumFragments"> the maximum number of fragments.
///
/// </param>
/// <returns> highlighted text fragments (between 0 and maxNumFragments number of fragments)
/// </returns>
public System.String[] GetBestFragments(Analyzer analyzer, System.String fieldName, System.String text, int maxNumFragments)
{
TokenStream tokenStream = analyzer.TokenStream(fieldName, new System.IO.StringReader(text));
return GetBestFragments(tokenStream, text, maxNumFragments);
}
示例10: GetBestFragments
/// <summary> Highlights chosen terms in a text, extracting the most relevant sections.
/// This is a convenience method that calls
/// {@link #getBestFragments(TokenStream, String, int)}
///
/// </summary>
/// <param name="analyzer"> the analyzer that will be used to split <code>text</code> into chunks </param>
/// <param name="text">text to highlight terms in</param>
/// <param name="maxNumFragments"> the maximum number of fragments.
///
/// </param>
/// <returns> highlighted text fragments (between 0 and maxNumFragments number of fragments)
/// </returns>
public String[] GetBestFragments(Analyzer analyzer, string text, int maxNumFragments)
{
TokenStream tokenStream = analyzer.TokenStream("field", new StringReader(text));
return GetBestFragments(tokenStream, text, maxNumFragments);
}
示例11: FormSimilarQuery
/// <summary> Simple similarity query generators.
/// Takes every unique word and forms a boolean query where all words are optional.
/// After you get this you'll use to to query your {@link IndexSearcher} for similar docs.
/// The only caveat is the first hit returned <b>should be</b> your source document - you'll
/// need to then ignore that.
///
/// <p>
///
/// So, if you have a code fragment like this:
/// <br>
/// <code>
/// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
/// </code>
///
/// <p>
///
/// </summary>
/// <summary> The query returned, in string form, will be <code>'(i use lucene to search fast searchers are good')</code>.
///
/// <p>
/// The philosophy behind this method is "two documents are similar if they share lots of words".
/// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
///
/// <P>
/// This method is fail-safe in that if a long 'body' is passed in and
/// {@link BooleanQuery#add BooleanQuery.add()} (used internally)
/// throws
/// {@link org.apache.lucene.search.BooleanQuery.TooManyClauses BooleanQuery.TooManyClauses}, the
/// query as it is will be returned.
///
///
///
///
///
/// </summary>
/// <param name="body">the body of the document you want to find similar documents to
/// </param>
/// <param name="a">the analyzer to use to parse the body
/// </param>
/// <param name="field">the field you want to search on, probably something like "contents" or "body"
/// </param>
/// <param name="stop">optional set of stop words to ignore
/// </param>
/// <returns> a query with all unique words in 'body'
/// </returns>
/// <throws> IOException this can't happen... </throws>
public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, System.Collections.Hashtable stop)
{
TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
Lucene.Net.Analysis.Token t;
BooleanQuery tmp = new BooleanQuery();
System.Collections.Hashtable already = new System.Collections.Hashtable(); // ignore dups
while ((t = ts.Next()) != null)
{
System.String word = t.TermText();
// ignore opt stop words
if (stop != null && stop.Contains(word))
continue;
// ignore dups
if (already.Contains(word) == true)
continue;
already.Add(word, word);
// add to query
TermQuery tq = new TermQuery(new Term(field, word));
try
{
tmp.Add(tq, BooleanClause.Occur.SHOULD); //false, false);
}
catch (BooleanQuery.TooManyClauses too)
{
// fail-safe, just return what we have, not the end of the world
break;
}
}
return tmp;
}
示例12: GetTokenStream
public TokenStream GetTokenStream(Analyzer analyzer)
{
if (!((FieldType)FieldType()).Indexed)
{
return null;
}
Number n = new Number();
NumericType? numericType = ((FieldType)FieldType()).NumericTypeValue;
if (numericType != null)
{
if (!(InternalTokenStream is NumericTokenStream))
{
// lazy init the TokenStream as it is heavy to instantiate
// (attributes,...) if not needed (stored field loading)
InternalTokenStream = new NumericTokenStream(Type.NumericPrecisionStep);
}
NumericTokenStream nts = (NumericTokenStream)InternalTokenStream;
// initialize value in TokenStream
object val = FieldsData;
switch (numericType)
{
case NumericType.INT:
nts.SetIntValue(Convert.ToInt32(val));
break;
case NumericType.LONG:
nts.SetLongValue(Convert.ToInt64(val));
break;
case NumericType.FLOAT:
nts.SetFloatValue(Convert.ToSingle(val));
break;
case NumericType.DOUBLE:
nts.SetDoubleValue(Convert.ToDouble(val));
break;
default:
throw new Exception("Should never get here");
}
return InternalTokenStream;
}
if (!((FieldType)FieldType()).Tokenized)
{
if (StringValue == null)
{
throw new System.ArgumentException("Non-Tokenized Fields must have a String value");
}
if (!(InternalTokenStream is StringTokenStream))
{
// lazy init the TokenStream as it is heavy to instantiate
// (attributes,...) if not needed (stored field loading)
InternalTokenStream = new StringTokenStream();
}
((StringTokenStream)InternalTokenStream).Value = StringValue;
return InternalTokenStream;
}
if (TokenStream_Renamed != null)
{
return TokenStream_Renamed;
}
else if (ReaderValue != null)
{
return analyzer.TokenStream(Name(), ReaderValue);
}
else if (StringValue != null)
{
TextReader sr = new StringReader(StringValue);
return analyzer.TokenStream(Name(), sr);
}
throw new System.ArgumentException("Field must have either TokenStream, String, Reader or Number value; got " + this);
}
示例13: FormSimilarQuery
/// <summary> Simple similarity query generators.
/// Takes every unique word and forms a boolean query where all words are optional.
/// After you get this you'll use to to query your <see cref="IndexSearcher"/> for similar docs.
/// The only caveat is the first hit returned <b>should be</b> your source document - you'll
/// need to then ignore that.
///
/// <p/>
///
/// So, if you have a code fragment like this:
/// <br/>
/// <code>
/// Query q = formSimilaryQuery( "I use Lucene to search fast. Fast searchers are good", new StandardAnalyzer(), "contents", null);
/// </code>
///
/// <p/>
///
/// The query returned, in string form, will be <c>'(i use lucene to search fast searchers are good')</c>.
///
/// <p/>
/// The philosophy behind this method is "two documents are similar if they share lots of words".
/// Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
///
/// <P/>
/// This method is fail-safe in that if a long 'body' is passed in and
/// <see cref="BooleanQuery.Add"/> (used internally)
/// throws
/// <see cref="BooleanQuery.TooManyClauses"/>, the
/// query as it is will be returned.
/// </summary>
/// <param name="body">the body of the document you want to find similar documents to
/// </param>
/// <param name="a">the analyzer to use to parse the body
/// </param>
/// <param name="field">the field you want to search on, probably something like "contents" or "body"
/// </param>
/// <param name="stop">optional set of stop words to ignore
/// </param>
/// <returns> a query with all unique words in 'body'
/// </returns>
/// <throws> IOException this can't happen... </throws>
public static Query FormSimilarQuery(System.String body, Analyzer a, System.String field, ISet<string> stop)
{
TokenStream ts = a.TokenStream(field, new System.IO.StringReader(body));
ITermAttribute termAtt = ts.AddAttribute<ITermAttribute>();
BooleanQuery tmp = new BooleanQuery();
ISet<string> already = Lucene.Net.Support.Compatibility.SetFactory.GetSet<string>(); // ignore dups
while (ts.IncrementToken())
{
String word = termAtt.Term;
// ignore opt stop words
if (stop != null && stop.Contains(word))
continue;
// ignore dups
if (already.Contains(word))
continue;
already.Add(word);
// add to query
TermQuery tq = new TermQuery(new Term(field, word));
try
{
tmp.Add(tq, Occur.SHOULD);
}
catch (BooleanQuery.TooManyClauses)
{
// fail-safe, just return what we have, not the end of the world
break;
}
}
return tmp;
}