当前位置: 首页>>代码示例>>C#>>正文


C# TokenStream类代码示例

本文整理汇总了C#中TokenStream的典型用法代码示例。如果您正苦于以下问题:C# TokenStream类的具体用法?C# TokenStream怎么用?C# TokenStream使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TokenStream类属于命名空间,在下文中一共展示了TokenStream类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: TypeTokenFilter

 public TypeTokenFilter(Version version, bool enablePositionIncrements, TokenStream input, HashSet<string> stopTypes, bool useWhiteList)
     : base(version, enablePositionIncrements, input)
 {
     typeAttribute = AddAttribute<ITypeAttribute>();
     this.stopTypes = stopTypes;
     this.useWhiteList = useWhiteList;
 }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:7,代码来源:TypeTokenFilter.cs

示例2: IndonesianStemFilter

 /// <summary>
 /// Create a new IndonesianStemFilter.
 /// <para>
 /// If <code>stemDerivational</code> is false, 
 /// only inflectional suffixes (particles and possessive pronouns) are stemmed.
 /// </para>
 /// </summary>
 public IndonesianStemFilter(TokenStream input, bool stemDerivational)
       : base(input)
 {
     this.stemDerivational = stemDerivational;
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAtt = AddAttribute<IKeywordAttribute>();
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:14,代码来源:IndonesianStemFilter.cs

示例3: FilteringTokenFilter

 /// <summary>
 /// Create a new <seealso cref="FilteringTokenFilter"/>. </summary>
 /// <param name="version"> the Lucene match version </param>
 /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
 public FilteringTokenFilter(LuceneVersion version, TokenStream @in)
     : base(@in)
 {
     posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
     this.version = version;
     this.enablePositionIncrements = true;
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:FilteringTokenFilter.cs

示例4: CompoundWordTokenFilterBase

        protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
            : base(input)
        {
            termAtt = AddAttribute<ICharTermAttribute>() as CharTermAttribute;
            offsetAtt = AddAttribute<IOffsetAttribute>();
            posIncAtt = AddAttribute<IPositionIncrementAttribute>();

            this.matchVersion = matchVersion;
            this.tokens = new LinkedList<CompoundToken>();
            if (minWordSize < 0)
            {
                throw new System.ArgumentException("minWordSize cannot be negative");
            }
            this.minWordSize = minWordSize;
            if (minSubwordSize < 0)
            {
                throw new System.ArgumentException("minSubwordSize cannot be negative");
            }
            this.minSubwordSize = minSubwordSize;
            if (maxSubwordSize < 0)
            {
                throw new System.ArgumentException("maxSubwordSize cannot be negative");
            }
            this.maxSubwordSize = maxSubwordSize;
            this.onlyLongestMatch = onlyLongestMatch;
            this.dictionary = dictionary;
        }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:27,代码来源:CompoundWordTokenFilterBase.cs

示例5: AddScriptMacro

 public void AddScriptMacro(String Script)
 {
     var stream = new TokenStream(new StringIterator(Script), Context);
     var declaration = Parse.ParseMacroDeclaration(stream, Context);
     declaration.OwnerContextID = Context.ID;
     Context.PendingEmission.Add(declaration);
 }
开发者ID:Blecki,项目名称:EtcScript,代码行数:7,代码来源:Environment.cs

示例6: NGramTokenFilter

	  /// <summary>
	  /// Creates NGramTokenFilter with given min and max n-grams. </summary>
	  /// <param name="version"> Lucene version to enable correct position increments.
	  ///                See <a href="#version">above</a> for details. </param>
	  /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
	  /// <param name="minGram"> the smallest n-gram to generate </param>
	  /// <param name="maxGram"> the largest n-gram to generate </param>
	  public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
	  {
		this.version = version;
		this.charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
		if (minGram < 1)
		{
		  throw new System.ArgumentException("minGram must be greater than zero");
		}
		if (minGram > maxGram)
		{
		  throw new System.ArgumentException("minGram must not be greater than maxGram");
		}
		this.minGram = minGram;
		this.maxGram = maxGram;
		if (version.onOrAfter(Version.LUCENE_44))
		{
		  posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
		  posLenAtt = addAttribute(typeof(PositionLengthAttribute));
		}
		else
		{
		  posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
		  posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
		}
	  }
开发者ID:paulirwin,项目名称:lucene.net,代码行数:32,代码来源:NGramTokenFilter.cs

示例7: CodepointCountFilter

 /// <summary>
 /// Create a new <seealso cref="CodepointCountFilter"/>. This will filter out tokens whose
 /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="Character#CodePointCount(char[], int, int)"/>
 /// &lt; min) or too long (<seealso cref="Character#codePointCount(char[], int, int)"/> &gt; max). </summary>
 /// <param name="version"> the Lucene match version </param>
 /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
 /// <param name="min">     the minimum length </param>
 /// <param name="max">     the maximum length </param>
 public CodepointCountFilter(LuceneVersion version, TokenStream @in, int min, int max)
     : base(version, @in)
 {
     this.min = min;
     this.max = max;
     termAtt = AddAttribute<ICharTermAttribute>();
 }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:15,代码来源:CodepointCountFilter.cs

示例8: NorwegianMinimalStemFilter

 /// <summary>
 /// Creates a new NorwegianLightStemFilter </summary>
 /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
 ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
 public NorwegianMinimalStemFilter(TokenStream input, int flags)
       : base(input)
 {
     this.stemmer = new NorwegianMinimalStemmer(flags);
     termAtt = AddAttribute<ICharTermAttribute>();
     keywordAttr = AddAttribute<IKeywordAttribute>();
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:NorwegianMinimalStemFilter.cs

示例9: CapitalizationFilter

        /// <summary>
        /// Creates a CapitalizationFilter with the specified parameters. </summary>
        /// <param name="in"> input tokenstream </param>
        /// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
        /// <param name="keep"> a keep word list.  Each word that should be kept separated by whitespace. </param>
        /// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
        /// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
        /// <param name="minWordLength"> how long the word needs to be to get capitalization applied.  If the
        ///                      minWordLength is 3, "and" > "And" but "or" stays "or". </param>
        /// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
        ///                     assumed to be correct. </param>
        /// <param name="maxTokenLength"> ??? </param>
        public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength)
            : base(@in)
        {
            // LUCENENET: The guard clauses were copied here from the version of Lucene.
            // Apparently, the tests were not ported from 4.8.0 because they expected this and the
            // original tests did not. Adding them anyway because there is no downside to this.
            if (minWordLength < 0)
            {
                throw new ArgumentOutOfRangeException("minWordLength must be greater than or equal to zero");
            }
            if (maxWordCount < 1)
            {
                throw new ArgumentOutOfRangeException("maxWordCount must be greater than zero");
            }
            if (maxTokenLength < 1)
            {
                throw new ArgumentOutOfRangeException("maxTokenLength must be greater than zero");
            }

            this.onlyFirstWord = onlyFirstWord;
            this.keep = keep;
            this.forceFirstLetter = forceFirstLetter;
            this.okPrefix = okPrefix;
            this.minWordLength = minWordLength;
            this.maxWordCount = maxWordCount;
            this.maxTokenLength = maxTokenLength;
            termAtt = AddAttribute<ICharTermAttribute>();
        }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:40,代码来源:CapitalizationFilter.cs

示例10: EdgeNGramTokenFilter

        public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
            : base(input)
        {
            if (version == null)
            {
              throw new System.ArgumentException("version must not be null");
            }

            if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
            {
              throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
            }

            if (side == null)
            {
              throw new System.ArgumentException("sideLabel must be either front or back");
            }

            if (minGram < 1)
            {
              throw new System.ArgumentException("minGram must be greater than zero");
            }

            if (minGram > maxGram)
            {
              throw new System.ArgumentException("minGram must not be greater than maxGram");
            }

            this.version = version;
            this.charUtils = version.onOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
            this.minGram = minGram;
            this.maxGram = maxGram;
            this.side = side;
        }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:34,代码来源:EdgeNGramTokenFilter.cs

示例11: GetTokenOutliningAction

        public override void GetTokenOutliningAction(TokenStream tokenStream, ref string outliningKey, ref OutliningNodeAction tokenAction)
        {
            Token token = tokenStream.Peek();

            if ((token.Key == "OpenCurlyBraceToken" || token.Key == "CloseCurlyBraceToken") && g.Config.b_Ed_CodeFold == false)
                return;

            switch (token.Key) {
                case "OpenCurlyBraceToken":
                    outliningKey = "CodeBlock";
                    tokenAction = OutliningNodeAction.Start;
                    break;
                case "CloseCurlyBraceToken":
                    outliningKey = "CodeBlock";
                    tokenAction = OutliningNodeAction.End;
                    break;
                case "RegionStartToken":
                    outliningKey = "CodeRegion";
                    tokenAction = OutliningNodeAction.Start;
                    break;
                case "RegionEndToken":
                    outliningKey = "CodeRegion";
                    tokenAction = OutliningNodeAction.End;
                    break;
            }
        }
开发者ID:Bloodknight,项目名称:TorqueDev,代码行数:26,代码来源:CSemanticParser.cs

示例12: SnowballFilter

 public SnowballFilter(TokenStream input, SnowballProgram stemmer)
       : base(input)
 {
     this.stemmer = stemmer;
     this.termAtt = AddAttribute<ICharTermAttribute>();
     this.keywordAttr = AddAttribute<IKeywordAttribute>();
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:7,代码来源:SnowballFilter.cs

示例13: TypeTokenFilter

 /// <summary>
 /// Create a new <seealso cref="TypeTokenFilter"/>. </summary>
 /// <param name="version">      the Lucene match version </param>
 /// <param name="input">        the <seealso cref="TokenStream"/> to consume </param>
 /// <param name="stopTypes">    the types to filter </param>
 /// <param name="useWhiteList"> if true, then tokens whose type is in stopTypes will
 ///                     be kept, otherwise they will be filtered out </param>
 public TypeTokenFilter(LuceneVersion version, TokenStream input, IEnumerable<string> stopTypes, bool useWhiteList)
     : base(version, input)
 {
     typeAttribute = AddAttribute<ITypeAttribute>();
     this.stopTypes = new HashSet<string>(stopTypes);
     this.useWhiteList = useWhiteList;
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:14,代码来源:TypeTokenFilter.cs

示例14: Parse

        public LaxExpression Parse(TokenStream reader)
        {
            var start = reader.Next.CodeRange;
            try
            {
                ParseStream(reader);

                //Finish the stack
                while (opStack.Count > 0)
                {
                    CompleteStack();
                }

                if (operandStack.Count != 1)
                    throw new SyntaxError(start, "Expected operator");

            }
            catch (SyntaxError)
            {
                throw;
            }
            #if !DEBUG
            catch (Exception ex)
            {
                throw new SyntaxError(reader.Current.CodeRange, ex);
            }
            #endif
            if (operandStack.Count != 1)
                throw new SyntaxError(start, "Expected only one operator left");

            return operandStack.Pop();
        }
开发者ID:hultqvist,项目名称:lax,代码行数:32,代码来源:ExpressionParser.cs

示例15: GermanStemFilter

 /// <summary>
 /// Builds a GermanStemFilter that uses an exclusiontable. 
 /// </summary>
 /// <param name="_in"></param>
 /// <param name="exclusiontable"></param>
 /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1.  This
 /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
 /// respectively, before the DIN1 stemmer is invoked.</param>
 public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool normalizeDin2)
     : base(_in)
 {
     exclusionSet = exclusiontable;
     stemmer = normalizeDin2 ? new GermanDIN2Stemmer() : new GermanStemmer();
     termAtt = AddAttribute<ITermAttribute>();
 }
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:15,代码来源:GermanStemFilter.cs


注:本文中的TokenStream类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。