本文整理汇总了C#中Dirichlet类的典型用法代码示例。如果您正苦于以下问题:C# Dirichlet类的具体用法?C# Dirichlet怎么用?C# Dirichlet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Dirichlet类属于命名空间,在下文中一共展示了Dirichlet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: GenerateLDAData
/// Generate LDA data - returns an array of dictionaries mapping unique word index
/// to word count per document.
/// <param name="trueTheta">Known Theta</param>
/// <param name="truePhi">Known Phi</param>
/// <param name="averageNumWords">Average number of words to sample per doc</param>
/// <returns></returns>
public static Dictionary<int, int>[] GenerateLDAData(Dirichlet[] trueTheta, Dirichlet[] truePhi, int averageNumWords)
{
int numVocab = truePhi[0].Dimension;
int numTopics = truePhi.Length;
int numDocs = trueTheta.Length;
// Sample from the model
Vector[] topicDist = new Vector[numDocs];
Vector[] wordDist = new Vector[numTopics];
for (int i = 0; i < numDocs; i++)
topicDist[i] = trueTheta[i].Sample();
for (int i = 0; i < numTopics; i++)
wordDist[i] = truePhi[i].Sample();
var wordCounts = new Dictionary<int, int>[numDocs];
for (int i=0; i < numDocs; i++)
{
int LengthOfDoc = Poisson.Sample((double)averageNumWords);
var counts = new Dictionary<int, int>();
for (int j=0; j < LengthOfDoc; j++)
{
int topic = Discrete.Sample(topicDist[i]);
int w = Discrete.Sample(wordDist[topic]);
if (!counts.ContainsKey(w))
counts.Add(w, 1);
else
counts[w] = counts[w] + 1;
}
wordCounts[i] = counts;
}
return wordCounts;
}
示例2: CanGetAlpha
public void CanGetAlpha()
{
var d = new Dirichlet(0.3, 10);
for (var i = 0; i < 10; i++)
{
Assert.AreEqual(0.3, d.Alpha[i]);
}
}
示例3: CanCreateSymmetricDirichlet
public void CanCreateSymmetricDirichlet()
{
var d = new Dirichlet(0.3, 5);
for (var i = 0; i < 5; i++)
{
Assert.AreEqual(0.3, d.Alpha[i]);
}
}
示例4: LogAverageFactor
/// <summary>
/// Evidence message for EP
/// </summary>
/// <param name="sample">Incoming message from 'sample'.</param>
/// <param name="trialCount">Constant value for 'trialCount'.</param>
/// <param name="p">Incoming message from 'p'.</param>
/// <returns>Logarithm of the factor's average value across the given argument distributions</returns>
/// <remarks><para>
/// The formula for the result is <c>log(sum_(sample,p) p(sample,p) factor(sample,trialCount,p))</c>.
/// </para></remarks>
public static double LogAverageFactor(IList<int> sample, int trialCount, Dirichlet p)
{
double result = MMath.GammaLn(trialCount+1);
for (int i = 0; i < sample.Count; i++) {
result += MMath.GammaLn(sample[i]+p.PseudoCount[i]) + MMath.GammaLn(p.PseudoCount[i])
-MMath.GammaLn(sample[i]+1);
}
result += MMath.GammaLn(p.TotalCount) - MMath.GammaLn(p.TotalCount + trialCount);
return result;
}
示例5: CanGetAlpha
public void CanGetAlpha()
{
Dirichlet d = new Dirichlet(0.3, 10);
double[] alpha = new double[10];
for (int i = 0; i < 10; i++)
{
Assert.AreEqual(0.3, d.Alpha[i]);
}
}
示例6: CanCreateDirichlet
public void CanCreateDirichlet()
{
var alpha = new double[10];
for (var i = 0; i < 10; i++)
{
alpha[i] = i;
}
var d = new Dirichlet(alpha);
for (var i = 0; i < 5; i++)
{
Assert.AreEqual(i, d.Alpha[i]);
}
}
示例7: CreateTrueThetaAndPhi
/// <summary>
/// Randomly create true theta and phi arrays
/// </summary>
/// <param name="numVocab">Vocabulary size</param>
/// <param name="numTopics">Number of topics</param>
/// <param name="numDocs">Number of documents</param>
/// <param name="averageDocLength">Avarage document length</param>
/// <param name="trueTheta">Theta array (output)</param>
/// <param name="truePhi">Phi array (output)</param>
public static void CreateTrueThetaAndPhi(
int numVocab, int numTopics, int numDocs, int averageDocLength, int averageWordsPerTopic,
out Dirichlet[] trueTheta, out Dirichlet[] truePhi)
{
truePhi = new Dirichlet[numTopics];
for (int i=0; i < numTopics; i++)
{
truePhi[i] = Dirichlet.Uniform(numVocab);
truePhi[i].PseudoCount.SetAllElementsTo(0.0);
// Draw the number of unique words in the topic.
int numUniqueWordsPerTopic = Poisson.Sample((double)averageWordsPerTopic);
if (numUniqueWordsPerTopic >= numVocab) numUniqueWordsPerTopic = numVocab;
if (numUniqueWordsPerTopic < 1) numUniqueWordsPerTopic = 1;
double expectedRepeatOfWordInTopic =
((double)numDocs) * averageDocLength / numUniqueWordsPerTopic;
int[] shuffledWordIndices = Rand.Perm(numVocab);
for (int j = 0; j < numUniqueWordsPerTopic; j++)
{
int wordIndex = shuffledWordIndices[j];
// Draw the count for that word
int cnt = Poisson.Sample(expectedRepeatOfWordInTopic);
truePhi[i].PseudoCount[wordIndex] = cnt + 1.0;
}
}
trueTheta = new Dirichlet[numDocs];
for (int i=0; i < numDocs; i++)
{
trueTheta[i] = Dirichlet.Uniform(numTopics);
trueTheta[i].PseudoCount.SetAllElementsTo(0.0);
// Draw the number of unique topics in the doc.
int numUniqueTopicsPerDoc = Math.Min(1 + Poisson.Sample(1.0), numTopics);
double expectedRepeatOfTopicInDoc =
averageDocLength / numUniqueTopicsPerDoc;
int[] shuffledTopicIndices = Rand.Perm(numTopics);
for (int j = 0; j < numUniqueTopicsPerDoc; j++)
{
int topicIndex = shuffledTopicIndices[j];
// Draw the count for that topic
int cnt = Poisson.Sample(expectedRepeatOfTopicInDoc);
trueTheta[i].PseudoCount[topicIndex] = cnt + 1.0;
}
}
}
示例8: RunTest
/// <summary>
/// Run a single test for a single model
/// </summary>
/// <param name="sizeVocab">Size of the vocabulary</param>
/// <param name="numTopics">Number of topics</param>
/// <param name="trainWordsInTrainDoc">Lists of words in training documents used for training</param>
/// <param name="testWordsInTrainDoc">Lists of words in training documents used for testing</param>
/// <param name="alpha">Background pseudo-counts for distributions over topics</param>
/// <param name="beta">Background pseudo-counts for distributions over words</param>
/// <param name="shared">If true, uses shared variable version of the model</param>
/// <param name="trueThetaTest">The true topic distributions for the documents in the test set</param>
/// <param name="wordsInTestDoc">Lists of words in test documents</param>
/// <param name="vocabulary">Vocabulary</param>
static void RunTest(
int sizeVocab,
int numTopics,
Dictionary<int, int>[] trainWordsInTrainDoc,
Dictionary<int, int>[] testWordsInTrainDoc,
double alpha,
double beta,
bool shared,
Dirichlet[] trueThetaTest,
Dictionary<int, int>[] wordsInTestDoc,
Dictionary<int, string> vocabulary = null
)
{
Stopwatch stopWatch = new Stopwatch();
// Square root of number of documents is the optimal for memory
int batchCount = (int)Math.Sqrt((double)trainWordsInTrainDoc.Length);
Rand.Restart(5);
ILDA model;
LDAPredictionModel predictionModel;
LDATopicInferenceModel topicInfModel;
if (shared)
{
model = new LDAShared(batchCount, sizeVocab, numTopics);
((LDAShared)model).IterationsPerPass = Enumerable.Repeat(10, 5).ToArray();
}
else
{
model = new LDAModel(sizeVocab, numTopics);
model.Engine.NumberOfIterations = 50;
}
Console.WriteLine("\n\n************************************");
Console.WriteLine(
String.Format("\nTraining {0}LDA model...\n",
shared ? "batched " : "non-batched "));
// Train the model - we will also get rough estimates of execution time and memory
Dirichlet[] postTheta, postPhi;
GC.Collect();
PerformanceCounter memCounter = new PerformanceCounter("Memory", "Available MBytes");
float preMem = memCounter.NextValue();
stopWatch.Reset();
stopWatch.Start();
double logEvidence = model.Infer(trainWordsInTrainDoc, alpha, beta, out postTheta, out postPhi);
stopWatch.Stop();
float postMem = memCounter.NextValue();
double approxMB = preMem - postMem;
GC.KeepAlive(model); // Keep the model alive to this point (for the memory counter)
Console.WriteLine(String.Format("Approximate memory usage: {0:F2} MB", approxMB));
Console.WriteLine(String.Format("Approximate execution time (including model compilation): {0} seconds", stopWatch.ElapsedMilliseconds/1000));
// Calculate average log evidence over total training words
int totalWords = trainWordsInTrainDoc.Sum(doc => doc.Sum(w => w.Value));
Console.WriteLine("\nTotal number of training words = {0}", totalWords);
Console.WriteLine(String.Format("Average log evidence of model: {0:F2}", logEvidence / (double)totalWords));
if (vocabulary != null)
{
int numWordsToPrint = 20;
// Print out the top n words for each topic
for (int i = 0; i < postPhi.Length; i++)
{
double[] pc = postPhi[i].PseudoCount.ToArray();
int[] wordIndices = new int[pc.Length];
for (int j=0; j < wordIndices.Length; j++)
wordIndices[j] = j;
Array.Sort(pc, wordIndices);
Console.WriteLine("Top {0} words in topic {1}:", numWordsToPrint, i);
int idx = wordIndices.Length;
for (int j = 0; j < numWordsToPrint; j++)
Console.Write("\t{0}", vocabulary[wordIndices[--idx]]);
Console.WriteLine();
}
}
if (testWordsInTrainDoc != null)
{
// Test on unseen words in training documents
Console.WriteLine("\n\nCalculating perplexity on test words in training documents...");
predictionModel = new LDAPredictionModel(sizeVocab, numTopics);
predictionModel.Engine.NumberOfIterations = 5;
var predDist = predictionModel.Predict(postTheta, postPhi);
var perplexity = Utilities.Perplexity(predDist, testWordsInTrainDoc);
Console.WriteLine(String.Format("\nPerplexity = {0:F3}", perplexity));
}
if (wordsInTestDoc != null)
//.........这里部分代码省略.........
示例9: AttachData
/// <summary>
/// Attachs the data to the workers labels with and sets the workers' confusion matrix priors.
/// </summary>
/// <param name="taskIndices">The matrix of the task indices (columns) of each worker (rows).</param>
/// <param name="workerLabels">The matrix of the labels (columns) of each worker (rows).</param>
/// <param name="confusionMatrixPrior">The workers' confusion matrix priors.</param>
protected virtual void AttachData(int[][] taskIndices, int[][] workerLabels, Dirichlet[][] confusionMatrixPrior)
{
int numClasses = c.SizeAsInt;
WorkerCount.ObservedValue = taskIndices.Length;
WorkerTaskCount.ObservedValue = taskIndices.Select(tasks => tasks.Length).ToArray();
WorkerTaskIndex.ObservedValue = taskIndices;
// Prediction mode is indicated by none of the workers having a label.
// We can just look at the first one
if (workerLabels[0] != null)
{
WorkerLabel.ObservedValue = workerLabels;
}
else
{
WorkerLabel.ClearObservedValue();
}
if (confusionMatrixPrior != null)
{
ConfusionMatrixPrior.ObservedValue = Util.ArrayInit(confusionMatrixPrior.Length, worker => Util.ArrayInit(numClasses, lab => confusionMatrixPrior[worker][lab]));
}
}
示例10: MeanAverageLogarithm
/// <summary>
/// VMP message to 'mean'
/// </summary>
/// <param name="mean">Incoming message from 'mean'. Must be a proper distribution. If any element is uniform, the result will be uniform.</param>
/// <param name="totalCount">Constant value for 'totalCount'.</param>
/// <param name="prob">Incoming message from 'prob'. Must be a proper distribution. If any element is uniform, the result will be uniform.</param>
/// <param name="to_mean">Previous outgoing message to 'mean'.</param>
/// <returns>The outgoing VMP message to the 'mean' argument</returns>
/// <remarks><para>
/// The outgoing message is the exponential of the average log-factor value, where the average is over all arguments except 'mean'.
/// The formula is <c>exp(sum_(prob) p(prob) log(factor(prob,mean,totalCount)))</c>.
/// </para></remarks>
/// <exception cref="ImproperMessageException"><paramref name="mean"/> is not a proper distribution</exception>
/// <exception cref="ImproperMessageException"><paramref name="prob"/> is not a proper distribution</exception>
public static Dirichlet MeanAverageLogarithm([Proper] Dirichlet mean, double totalCount, [SkipIfUniform] Dirichlet prob, Dirichlet to_mean)
{
return MeanAverageLogarithm(mean, Gamma.PointMass(totalCount), prob, to_mean);
}
示例11: ProbAverageConditional
public static Dirichlet ProbAverageConditional([SkipIfUniform] Gamma alpha, Dirichlet result)
{
throw new NotSupportedException(NotSupportedMessage);
}
示例12: LogEvidenceRatio
public static double LogEvidenceRatio(Dirichlet prob, double alpha) { return 0.0; }
示例13: AverageLogFactor
/// <summary>
/// Evidence message for VMP
/// </summary>
/// <param name="sample">Incoming message from 'sampleFromPseudoCounts'.</param>
/// <param name="pseudoCounts">Constant value for 'pseudoCount'.</param>
/// <param name="to_sample">Outgoing message to 'sample'.</param>
/// <returns>Average of the factor's log-value across the given argument distributions</returns>
/// <remarks><para>
/// The formula for the result is <c>sum_(sampleFromPseudoCounts) p(sampleFromPseudoCounts) log(factor(sampleFromPseudoCounts,pseudoCount))</c>.
/// Adding up these values across all factors and variables gives the log-evidence estimate for VMP.
/// </para></remarks>
public static double AverageLogFactor(Dirichlet sample, Vector pseudoCounts, [Fresh] Dirichlet to_sample)
{
return to_sample.GetAverageLog(sample);
}
示例14: ValidateVariance
public void ValidateVariance()
{
var alpha = new double[10];
var sum = 0.0;
for (var i = 0; i < 10; i++)
{
alpha[i] = i;
sum += i;
}
var d = new Dirichlet(alpha);
for (var i = 0; i < 10; i++)
{
AssertHelpers.AlmostEqual(i * (sum - i) / (sum * sum * (sum + 1.0)), d.Variance[i], 15);
}
}
示例15: ValidateMean
public void ValidateMean()
{
var d = new Dirichlet(0.3, 5);
for (var i = 0; i < 5; i++)
{
AssertHelpers.AlmostEqual(0.3 / 1.5, d.Mean[i], 15);
}
}