本文整理汇总了C#中Lucene.Net.Analysis.Standard.StandardTokenizer类的典型用法代码示例。如果您正苦于以下问题:C# StandardTokenizer类的具体用法?C# StandardTokenizer怎么用?C# StandardTokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
StandardTokenizer类属于Lucene.Net.Analysis.Standard命名空间,在下文中一共展示了StandardTokenizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
var shingleMatrix = new ShingleMatrixFilter(tokenizer, 2, 8, ' ');
var lowerCaseFilter = new LowerCaseFilter(shingleMatrix);
return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
示例2: CreateComponents
/// <summary>
/// Constructs a <seealso cref="StandardTokenizer"/> filtered by a {@link
/// StandardFilter}, a <seealso cref="LowerCaseFilter"/>, a <seealso cref="StopFilter"/>,
/// and a <seealso cref="SnowballFilter"/>
/// </summary>
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, tokenizer);
// remove the possessive 's for english stemmers
if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && (name.Equals("English") || name.Equals("Porter") || name.Equals("Lovins")))
{
result = new EnglishPossessiveFilter(result);
}
// Use a special lowercase filter for turkish, the stemmer expects it.
if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && name.Equals("Turkish"))
{
result = new TurkishLowerCaseFilter(result);
}
else
{
result = new LowerCaseFilter(matchVersion, result);
}
if (stopSet != null)
{
result = new StopFilter(matchVersion, result, stopSet);
}
result = new SnowballFilter(result, name);
return new TokenStreamComponents(tokenizer, result);
}
示例3: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
// This should be a good tokenizer for most European-language documents:
// Splits words at punctuation characters, removing punctuation.
// Splits words at hyphens, unless there's a number in the token...
// Recognizes email addresses and internet hostnames as one token.
var intput = new StandardTokenizer(Version.LUCENE_30, reader);
// A ShingleMatrixFilter constructs shingles from a token stream.
// "2010 Audi RS5 Quattro Coupe" => "2010 Audi", "Audi RS5", "RS5 Quattro", "Quattro Coupe"
var shingleMatrixOutput = new ShingleMatrixFilter(
// stream from which to construct the matrix
intput,
// minimum number of tokens in any shingle
2,
// maximum number of tokens in any shingle.
8,
// character to use between texts of the token parts in a shingle.
' ');
// Normalizes token text to lower case.
var lowerCaseFilter = new LowerCaseFilter(shingleMatrixOutput);
// Removes stop words from a token stream.
return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
示例4: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
TokenStream filterStream = new StandardFilter(tokenizer);
TokenStream stream = new StopFilter(true, filterStream, _stopWords, true);
return stream;
}
示例5: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new RuSnowballFilter(result);
return result;
}
示例6: TokenStream
/// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
/// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
/// </summary>
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopSet);
return result;
}
示例7: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_29, reader);
tokenizer.MaxTokenLength = 255;
TokenStream filter = new StandardFilter(tokenizer);
filter = new LowerCaseFilter(filter);
filter = new StopFilter(false, filter, StandardAnalyzer.STOP_WORDS_SET);
return new NGramTokenFilter(filter, 2, 6);
}
示例8: TokenStream
public override TokenStream TokenStream(String fieldName, TextReader reader)
{
Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream sink = new StandardFilter(source);
sink = new LowerCaseFilter(sink);
//sink = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), sink, stopSet);
sink = new CroatianStemFilter(sink, stemmer);
return sink;
}
示例9: TokenStream
public override TokenStream TokenStream(String fieldName, TextReader reader)
{
TokenStream ts = new StandardTokenizer(matchVersion, reader);
ts = new StandardFilter(ts);
ts = new ThaiWordFilter(ts);
ts = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
return ts;
}
示例10: TokenStream
/** Constructs a {@link StandardTokenizer} filtered by a {@link
* StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}
* and a {@link SpanishStemFilter}. */
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer(Version.LUCENE_24,reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(true,result, stopTable);
result = new SpanishStemFilter(result);
return result;
}
示例11: TokenStream
public override TokenStream TokenStream(string fieldname, TextReader reader)
{
TokenStream result = new StandardTokenizer(_version, reader);
result = new LowerCaseFilter(result);
result = new PersianNormalizationFilter(result);
result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(_version), result, _stoptable);
result = new PersianStemFilter(result);
return result;
}
示例12: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(this.enableStopPositionIncrements, result, stoptable);
result = new BulgarianStemFilter(result);
return result;
}
示例13: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
tokenizer.MaxTokenLength = 255;
TokenStream filter = new StandardFilter(tokenizer);
filter = new LowerCaseFilter(filter);
filter = new NGramTokenFilter(filter, 2, 255);
return filter;
}
示例14: TokenStream
/// <summary>Constructs a <see cref="StandardTokenizer"/> filtered by a {@link
/// StandardFilter}, a <see cref="LowerCaseFilter"/> and a <see cref="StopFilter"/>.
/// </summary>
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
if (stopSet != null)
result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
result, stopSet);
result = new SnowballFilter(result, name);
return result;
}
示例15: TestElision_
public virtual void TestElision_()
{
string test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, AsSet("l", "M"), false);
TokenFilter filter = new ElisionFilter(tokenizer, articles);
IList<string> tas = Filter(filter);
assertEquals("embrouille", tas[4]);
assertEquals("O'brian", tas[6]);
assertEquals("enfin", tas[7]);
}