当前位置: 首页>>代码示例>>C#>>正文


C# IDocument.Tokenize方法代码示例

本文整理汇总了C#中IDocument.Tokenize方法的典型用法代码示例。如果您正苦于以下问题:C# IDocument.Tokenize方法的具体用法?C# IDocument.Tokenize怎么用?C# IDocument.Tokenize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在IDocument的用法示例。


在下文中一共展示了IDocument.Tokenize方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: StoreDocument

		/// <summary>
		/// Stores a document in the index.
		/// </summary>
		/// <param name="document">The document.</param>
		/// <param name="keywords">The document keywords, if any, an empty array or <c>null</c> otherwise.</param>
		/// <param name="content">The content of the document.</param>
		/// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
		/// <returns>The number of indexed words (including duplicates).</returns>
		/// <remarks>Indexing the content of the document is <b>O(n)</b>,
		/// where <b>n</b> is the total number of words in the document.</remarks>
		public int StoreDocument(IDocument document, string[] keywords, string content, object state) {
			if(document == null) throw new ArgumentNullException("document");
			if(keywords == null) keywords = new string[0];
			if(content == null) throw new ArgumentNullException("content");

			RemoveDocument(document, state);

			keywords = ScrewTurn.Wiki.SearchEngine.Tools.CleanupKeywords(keywords);

			// Prepare content words
			WordInfo[] contentWords = document.Tokenize(content);
			contentWords = ScrewTurn.Wiki.SearchEngine.Tools.RemoveStopWords(contentWords, stopWords);

			// Prepare title words
			WordInfo[] titleWords = document.Tokenize(document.Title);
			titleWords = ScrewTurn.Wiki.SearchEngine.Tools.RemoveStopWords(titleWords, stopWords);
			for(int i = 0; i < titleWords.Length; i++) {
				titleWords[i] = new WordInfo(titleWords[i].Text, titleWords[i].FirstCharIndex, titleWords[i].WordIndex, WordLocation.Title);
			}

			// Prepare keywords
			WordInfo[] words = new WordInfo[keywords.Length];
			int count = 0;
			for(int i = 0; i < words.Length; i++) {
				words[i] = new WordInfo(keywords[i], (ushort)count, (ushort)i, WordLocation.Keywords);
				count += 1 + keywords[i].Length;
			}

			return connector.SaveDataForDocument(document, contentWords, titleWords, words, state);
		}
开发者ID:swallowcamel,项目名称:ScrewTurn.Wiki.Plugins.Oracle,代码行数:40,代码来源:OracleIndex.cs

示例2: StoreDocument

        /// <summary>
        /// Stores a document in the index.
        /// </summary>
        /// <param name="document">The document.</param>
        /// <param name="keywords">The document keywords, if any, an empty array or <c>null</c> otherwise.</param>
        /// <param name="content">The content of the document.</param>
        /// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
        /// <returns>The number of indexed words (including duplicates) in the document title and content.</returns>
        /// <remarks>Indexing the content of the document is <b>O(n)</b>, 
        /// where <b>n</b> is the total number of words in the document.
        /// If the specified document was already in the index, all the old occurrences
        /// are deleted from the index.</remarks>
        /// <exception cref="ArgumentNullException">If <paramref name="document"/> or <paramref name="content"/> are <c>null</c>.</exception>
        public int StoreDocument(IDocument document, string[] keywords, string content, object state)
        {
            if(document == null) throw new ArgumentNullException("document");
            if(keywords == null) keywords = new string[0];
            if(content == null) throw new ArgumentNullException("content");

            lock(this) {
                DumpedChange removeChange = RemoveDocumentInternal(document);

                if(removeChange != null) {
                    OnIndexChange(document, IndexChangeType.DocumentRemoved, removeChange, state);
                }
            }

            keywords = Tools.CleanupKeywords(keywords);

            // When the IndexStorer handles the IndexChanged event and a document is added, the storer generates a new ID and returns it
            // via the event handler, then the in-memory index is updated (the document instance is shared across all words) - the final ID
            // is generated by the actual IndexStorer implementation (SaveData properly populates the Result field in the args)

            List<DumpedWord> dw = new List<DumpedWord>(content.Length / 5);
            List<DumpedWordMapping> dm = new List<DumpedWordMapping>(content.Length / 5);
            Word tempWord = null;
            List<Word> newWords = new List<Word>(50);
            DumpedWord tempDumpedWord = null;

            int count = 0;
            uint sequentialWordId = uint.MaxValue;

            // Store content words
            WordInfo[] words = document.Tokenize(content);
            words = Tools.RemoveStopWords(words, stopWords);

            foreach(WordInfo info in words) {
                dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Content, out tempWord, out tempDumpedWord));
                if(tempDumpedWord != null && tempWord != null) {
                    dm[dm.Count - 1].WordID = sequentialWordId;
                    tempDumpedWord.ID = sequentialWordId;
                    dw.Add(tempDumpedWord);
                    tempWord.ID = sequentialWordId;
                    newWords.Add(tempWord);
                    sequentialWordId--;
                }
            }
            count += words.Length;

            // Store title words
            words = document.Tokenize(document.Title);
            words = Tools.RemoveStopWords(words, stopWords);

            foreach(WordInfo info in words) {
                dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Title, out tempWord, out tempDumpedWord));
                if(tempDumpedWord != null && tempWord != null) {
                    dm[dm.Count - 1].WordID = sequentialWordId;
                    tempDumpedWord.ID = sequentialWordId;
                    dw.Add(tempDumpedWord);
                    tempWord.ID = sequentialWordId;
                    newWords.Add(tempWord);
                    sequentialWordId--;
                }
            }
            count += words.Length;

            ushort tempCount = 0;

            // Store keywords
            for(ushort i = 0; i < (ushort)keywords.Length; i++) {
                dm.Add(StoreWord(keywords[i], document, tempCount, i, WordLocation.Keywords, out tempWord, out tempDumpedWord));
                if(tempDumpedWord != null && tempWord != null) {
                    dm[dm.Count - 1].WordID = sequentialWordId;
                    tempDumpedWord.ID = sequentialWordId;
                    dw.Add(tempDumpedWord);
                    tempWord.ID = sequentialWordId;
                    newWords.Add(tempWord);
                    sequentialWordId--;
                }
                tempCount += (ushort)(1 + keywords[i].Length);
            }
            count += keywords.Length;

            IndexStorerResult result = OnIndexChange(document, IndexChangeType.DocumentAdded,
                new DumpedChange(new DumpedDocument(document), dw, dm), state);

            // Update document ID
            if(result != null && result.DocumentID.HasValue) {
                document.ID = result.DocumentID.Value;
            }
//.........这里部分代码省略.........
开发者ID:mono,项目名称:ScrewTurnWiki,代码行数:101,代码来源:InMemoryIndexBase.cs


注:本文中的IDocument.Tokenize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。