本文整理汇总了C#中SparseVector.Sort方法的典型用法代码示例。如果您正苦于以下问题:C# SparseVector.Sort方法的具体用法?C# SparseVector.Sort怎么用?C# SparseVector.Sort使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SparseVector
的用法示例。
在下文中一共展示了SparseVector.Sort方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: ProcessDocument
private SparseVector<double> ProcessDocument(string document)
{
Set<string> docWords = new Set<string>();
Dictionary<int, int> tfVec = new Dictionary<int, int>();
ArrayList<WordStem> nGrams = new ArrayList<WordStem>(mMaxNGramLen);
mTokenizer.Text = document;
foreach (string token in mTokenizer)
{
string word = token.Trim().ToLower();
if (mStopWords == null || !mStopWords.Contains(word))
{
string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower();
if (nGrams.Count < mMaxNGramLen)
{
WordStem wordStem = new WordStem();
wordStem.Word = word;
wordStem.Stem = stem;
nGrams.Add(wordStem);
if (nGrams.Count < mMaxNGramLen) { continue; }
}
else
{
WordStem wordStem = nGrams[0];
wordStem.Word = word;
wordStem.Stem = stem;
for (int i = 0; i < mMaxNGramLen - 1; i++) { nGrams[i] = nGrams[i + 1]; }
nGrams[mMaxNGramLen - 1] = wordStem;
}
ProcessDocumentNGrams(nGrams, 0, tfVec, docWords);
}
}
int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0;
for (int i = startIdx; i < nGrams.Count; i++)
{
ProcessDocumentNGrams(nGrams, i, tfVec, docWords);
}
SparseVector<double> docVec = new SparseVector<double>();
foreach (KeyValuePair<int, int> tfItem in tfVec)
{
docVec.InnerIdx.Add(tfItem.Key);
docVec.InnerDat.Add(tfItem.Value);
}
docVec.Sort();
return docVec;
}
示例2: Initialize
//.........这里部分代码省略.........
{
docCount++;
mLogger.ProgressFast(this, "Initialize", "Document {0} ...", docCount, /*numSteps=*/-1);
Set<string> docWords = new Set<string>();
ArrayList<WordStem> nGrams = new ArrayList<WordStem>(mMaxNGramLen);
mTokenizer.Text = document;
foreach (string token in mTokenizer)
{
string word = token.Trim().ToLower();
if (mStopWords == null || !mStopWords.Contains(word))
{
string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower();
if (nGrams.Count < mMaxNGramLen)
{
WordStem wordStem = new WordStem();
wordStem.Word = word;
wordStem.Stem = stem;
nGrams.Add(wordStem);
if (nGrams.Count < mMaxNGramLen) { continue; }
}
else
{
WordStem wordStem = nGrams[0];
wordStem.Word = word;
wordStem.Stem = stem;
for (int i = 0; i < mMaxNGramLen - 1; i++) { nGrams[i] = nGrams[i + 1]; }
nGrams[mMaxNGramLen - 1] = wordStem;
}
ProcessNGramsPass1(nGrams, 0, docWords);
}
}
int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0;
for (int i = startIdx; i < nGrams.Count; i++)
{
ProcessNGramsPass1(nGrams, i, docWords);
}
}
mLogger.ProgressFast(this, "Initialize", "Document {0} ...", docCount, docCount);
// determine most frequent word and n-gram forms
foreach (Word wordInfo in mWordInfo.Values)
{
int max = 0;
foreach (KeyValuePair<string, int> wordForm in wordInfo.mForms)
{
if (wordForm.Value > max)
{
max = wordForm.Value;
wordInfo.mMostFrequentForm = wordForm.Key;
}
}
}
// compute bag-of-words vectors
mLogger.Info("Initialize", "Computing bag-of-words vectors ...");
int docNum = 1;
foreach (string document in documents)
{
mLogger.ProgressFast(this, "Initialize", "Document {0} / {1} ...", docNum++, docCount);
Dictionary<int, int> tfVec = new Dictionary<int, int>();
ArrayList<WordStem> nGrams = new ArrayList<WordStem>(mMaxNGramLen);
mTokenizer.Text = document;
foreach (string token in mTokenizer)
{
string word = token.Trim().ToLower();
if (mStopWords == null || !mStopWords.Contains(word))
{
string stem = mStemmer == null ? word : mStemmer.GetStem(word).Trim().ToLower();
if (nGrams.Count < mMaxNGramLen)
{
WordStem wordStem = new WordStem();
wordStem.Word = word;
wordStem.Stem = stem;
nGrams.Add(wordStem);
if (nGrams.Count < mMaxNGramLen) { continue; }
}
else
{
WordStem wordStem = nGrams[0];
wordStem.Word = word;
wordStem.Stem = stem;
for (int i = 0; i < mMaxNGramLen - 1; i++) { nGrams[i] = nGrams[i + 1]; }
nGrams[mMaxNGramLen - 1] = wordStem;
}
ProcessNGramsPass2(nGrams, 0, tfVec);
}
}
int startIdx = nGrams.Count == mMaxNGramLen ? 1 : 0;
for (int i = startIdx; i < nGrams.Count; i++)
{
ProcessNGramsPass2(nGrams, i, tfVec);
}
SparseVector<double> docVec = new SparseVector<double>();
foreach (KeyValuePair<int, int> tfItem in tfVec)
{
docVec.InnerIdx.Add(tfItem.Key);
docVec.InnerDat.Add(tfItem.Value);
}
docVec.Sort();
mTfVectors.Add(docVec);
}
}