本文整理汇总了C#中IDocument.Tokenize方法的典型用法代码示例。如果您正苦于以下问题:C# IDocument.Tokenize方法的具体用法?C# IDocument.Tokenize怎么用?C# IDocument.Tokenize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类IDocument
的用法示例。
在下文中一共展示了IDocument.Tokenize方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: StoreDocument
/// <summary>
/// Stores a document in the index.
/// </summary>
/// <param name="document">The document.</param>
/// <param name="keywords">The document keywords, if any, an empty array or <c>null</c> otherwise.</param>
/// <param name="content">The content of the document.</param>
/// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
/// <returns>The number of indexed words (including duplicates).</returns>
/// <remarks>Indexing the content of the document is <b>O(n)</b>,
/// where <b>n</b> is the total number of words in the document.</remarks>
public int StoreDocument(IDocument document, string[] keywords, string content, object state) {
if(document == null) throw new ArgumentNullException("document");
if(keywords == null) keywords = new string[0];
if(content == null) throw new ArgumentNullException("content");
RemoveDocument(document, state);
keywords = ScrewTurn.Wiki.SearchEngine.Tools.CleanupKeywords(keywords);
// Prepare content words
WordInfo[] contentWords = document.Tokenize(content);
contentWords = ScrewTurn.Wiki.SearchEngine.Tools.RemoveStopWords(contentWords, stopWords);
// Prepare title words
WordInfo[] titleWords = document.Tokenize(document.Title);
titleWords = ScrewTurn.Wiki.SearchEngine.Tools.RemoveStopWords(titleWords, stopWords);
for(int i = 0; i < titleWords.Length; i++) {
titleWords[i] = new WordInfo(titleWords[i].Text, titleWords[i].FirstCharIndex, titleWords[i].WordIndex, WordLocation.Title);
}
// Prepare keywords
WordInfo[] words = new WordInfo[keywords.Length];
int count = 0;
for(int i = 0; i < words.Length; i++) {
words[i] = new WordInfo(keywords[i], (ushort)count, (ushort)i, WordLocation.Keywords);
count += 1 + keywords[i].Length;
}
return connector.SaveDataForDocument(document, contentWords, titleWords, words, state);
}
示例2: StoreDocument
/// <summary>
/// Stores a document in the index.
/// </summary>
/// <param name="document">The document.</param>
/// <param name="keywords">The document keywords, if any, an empty array or <c>null</c> otherwise.</param>
/// <param name="content">The content of the document.</param>
/// <param name="state">A state object that is passed to the IndexStorer SaveDate/DeleteData function.</param>
/// <returns>The number of indexed words (including duplicates) in the document title and content.</returns>
/// <remarks>Indexing the content of the document is <b>O(n)</b>,
/// where <b>n</b> is the total number of words in the document.
/// If the specified document was already in the index, all the old occurrences
/// are deleted from the index.</remarks>
/// <exception cref="ArgumentNullException">If <paramref name="document"/> or <paramref name="content"/> are <c>null</c>.</exception>
public int StoreDocument(IDocument document, string[] keywords, string content, object state)
{
if(document == null) throw new ArgumentNullException("document");
if(keywords == null) keywords = new string[0];
if(content == null) throw new ArgumentNullException("content");
lock(this) {
DumpedChange removeChange = RemoveDocumentInternal(document);
if(removeChange != null) {
OnIndexChange(document, IndexChangeType.DocumentRemoved, removeChange, state);
}
}
keywords = Tools.CleanupKeywords(keywords);
// When the IndexStorer handles the IndexChanged event and a document is added, the storer generates a new ID and returns it
// via the event handler, then the in-memory index is updated (the document instance is shared across all words) - the final ID
// is generated by the actual IndexStorer implementation (SaveData properly populates the Result field in the args)
List<DumpedWord> dw = new List<DumpedWord>(content.Length / 5);
List<DumpedWordMapping> dm = new List<DumpedWordMapping>(content.Length / 5);
Word tempWord = null;
List<Word> newWords = new List<Word>(50);
DumpedWord tempDumpedWord = null;
int count = 0;
uint sequentialWordId = uint.MaxValue;
// Store content words
WordInfo[] words = document.Tokenize(content);
words = Tools.RemoveStopWords(words, stopWords);
foreach(WordInfo info in words) {
dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Content, out tempWord, out tempDumpedWord));
if(tempDumpedWord != null && tempWord != null) {
dm[dm.Count - 1].WordID = sequentialWordId;
tempDumpedWord.ID = sequentialWordId;
dw.Add(tempDumpedWord);
tempWord.ID = sequentialWordId;
newWords.Add(tempWord);
sequentialWordId--;
}
}
count += words.Length;
// Store title words
words = document.Tokenize(document.Title);
words = Tools.RemoveStopWords(words, stopWords);
foreach(WordInfo info in words) {
dm.Add(StoreWord(info.Text, document, info.FirstCharIndex, info.WordIndex, WordLocation.Title, out tempWord, out tempDumpedWord));
if(tempDumpedWord != null && tempWord != null) {
dm[dm.Count - 1].WordID = sequentialWordId;
tempDumpedWord.ID = sequentialWordId;
dw.Add(tempDumpedWord);
tempWord.ID = sequentialWordId;
newWords.Add(tempWord);
sequentialWordId--;
}
}
count += words.Length;
ushort tempCount = 0;
// Store keywords
for(ushort i = 0; i < (ushort)keywords.Length; i++) {
dm.Add(StoreWord(keywords[i], document, tempCount, i, WordLocation.Keywords, out tempWord, out tempDumpedWord));
if(tempDumpedWord != null && tempWord != null) {
dm[dm.Count - 1].WordID = sequentialWordId;
tempDumpedWord.ID = sequentialWordId;
dw.Add(tempDumpedWord);
tempWord.ID = sequentialWordId;
newWords.Add(tempWord);
sequentialWordId--;
}
tempCount += (ushort)(1 + keywords[i].Length);
}
count += keywords.Length;
IndexStorerResult result = OnIndexChange(document, IndexChangeType.DocumentAdded,
new DumpedChange(new DumpedDocument(document), dw, dm), state);
// Update document ID
if(result != null && result.DocumentID.HasValue) {
document.ID = result.DocumentID.Value;
}
//.........这里部分代码省略.........