当前位置: 首页>>代码示例>>C#>>正文


C# SparseVector.Sort方法代码示例

本文整理汇总了C#中SparseVector.Sort方法的典型用法代码示例。如果您正苦于以下问题:C# SparseVector.Sort方法的具体用法?C# SparseVector.Sort怎么用?C# SparseVector.Sort使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SparseVector的用法示例。


在下文中一共展示了SparseVector.Sort方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: ProcessDocument

 private SparseVector<double> ProcessDocument(string document)
 {
     Set<string> docWords = new Set<string>();
     Dictionary<int, int> tfVec = new Dictionary<int, int>();
     ArrayList<WordStem> nGrams = new ArrayList<WordStem>(mMaxNGramLen);
     mTokenizer.Text = document;
     foreach (string token in mTokenizer)
     {
         string word = token.Trim().ToLower();
         if (mStopWords == null || !mStopWords.Contains(word))
         {
             string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower();
             if (nGrams.Count < mMaxNGramLen)
             {
                 WordStem wordStem = new WordStem();
                 wordStem.Word = word;
                 wordStem.Stem = stem;
                 nGrams.Add(wordStem);
                 if (nGrams.Count < mMaxNGramLen) { continue; }
             }
             else
             {
                 WordStem wordStem = nGrams[0];
                 wordStem.Word = word;
                 wordStem.Stem = stem;
                 for (int i = 0; i < mMaxNGramLen - 1; i++) { nGrams[i] = nGrams[i + 1]; }
                 nGrams[mMaxNGramLen - 1] = wordStem;
             }
             ProcessDocumentNGrams(nGrams, 0, tfVec, docWords);
         }
     }
     int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0;
     for (int i = startIdx; i < nGrams.Count; i++)
     {
         ProcessDocumentNGrams(nGrams, i, tfVec, docWords);
     }
     SparseVector<double> docVec = new SparseVector<double>();
     foreach (KeyValuePair<int, int> tfItem in tfVec)
     {
         docVec.InnerIdx.Add(tfItem.Key);
         docVec.InnerDat.Add(tfItem.Value);
     }
     docVec.Sort();
     return docVec;
 }
开发者ID:viidea,项目名称:latino,代码行数:45,代码来源:IncrementalBowSpace.cs

示例2: Initialize


//.........这里部分代码省略.........
     {
         docCount++;
         mLogger.ProgressFast(this, "Initialize", "Document {0} ...", docCount, /*numSteps=*/-1);
         Set<string> docWords = new Set<string>();
         ArrayList<WordStem> nGrams = new ArrayList<WordStem>(mMaxNGramLen);
         mTokenizer.Text = document;
         foreach (string token in mTokenizer)
         {
             string word = token.Trim().ToLower();
             if (mStopWords == null || !mStopWords.Contains(word))
             {
                 string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower();
                 if (nGrams.Count < mMaxNGramLen)
                 {
                     WordStem wordStem = new WordStem();
                     wordStem.Word = word;
                     wordStem.Stem = stem;
                     nGrams.Add(wordStem);
                     if (nGrams.Count < mMaxNGramLen) { continue; }
                 }
                 else
                 {
                     WordStem wordStem = nGrams[0];
                     wordStem.Word = word;
                     wordStem.Stem = stem;
                     for (int i = 0; i < mMaxNGramLen - 1; i++) { nGrams[i] = nGrams[i + 1]; }
                     nGrams[mMaxNGramLen - 1] = wordStem;
                 }
                 ProcessNGramsPass1(nGrams, 0, docWords);
             }
         }
         int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0;
         for (int i = startIdx; i < nGrams.Count; i++)
         {
             ProcessNGramsPass1(nGrams, i, docWords);
         }
     }
     mLogger.ProgressFast(this, "Initialize", "Document {0} ...", docCount, docCount);
     // determine most frequent word and n-gram forms
     foreach (Word wordInfo in mWordInfo.Values)
     {
         int max = 0;
         foreach (KeyValuePair<string, int> wordForm in wordInfo.mForms)
         {
             if (wordForm.Value > max)
             {
                 max = wordForm.Value;
                 wordInfo.mMostFrequentForm = wordForm.Key;
             }
         }
     }
     // compute bag-of-words vectors
     mLogger.Info("Initialize", "Computing bag-of-words vectors ...");
     int docNum = 1;
     foreach (string document in documents)
     {
         mLogger.ProgressFast(this, "Initialize", "Document {0} / {1} ...", docNum++, docCount);
         Dictionary<int, int> tfVec = new Dictionary<int, int>();
         ArrayList<WordStem> nGrams = new ArrayList<WordStem>(mMaxNGramLen);
         mTokenizer.Text = document;
         foreach (string token in mTokenizer)
         {
             string word = token.Trim().ToLower();
             if (mStopWords == null || !mStopWords.Contains(word))
             {
                 string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower();
                 if (nGrams.Count < mMaxNGramLen)
                 {
                     WordStem wordStem = new WordStem();
                     wordStem.Word = word;
                     wordStem.Stem = stem;
                     nGrams.Add(wordStem);
                     if (nGrams.Count < mMaxNGramLen) { continue; }
                 }
                 else
                 {
                     WordStem wordStem = nGrams[0];
                     wordStem.Word = word;
                     wordStem.Stem = stem;
                     for (int i = 0; i < mMaxNGramLen - 1; i++) { nGrams[i] = nGrams[i + 1]; }
                     nGrams[mMaxNGramLen - 1] = wordStem;
                 }
                 ProcessNGramsPass2(nGrams, 0, tfVec);
             }
         }
         int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0;
         for (int i = startIdx; i < nGrams.Count; i++)
         {
             ProcessNGramsPass2(nGrams, i, tfVec);
         }
         SparseVector<double> docVec = new SparseVector<double>();
         foreach (KeyValuePair<int, int> tfItem in tfVec)
         {
             docVec.InnerIdx.Add(tfItem.Key);
             docVec.InnerDat.Add(tfItem.Value);
         }
         docVec.Sort();
         mTfVectors.Add(docVec);
     }
 }
开发者ID:viidea,项目名称:latino,代码行数:101,代码来源:IncrementalBowSpace.cs


注:本文中的SparseVector.Sort方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。