本文整理汇总了Golang中github.com/shuLhan/tabula.ClasetInterface.GetClassValueSpace方法的典型用法代码示例。如果您正苦于以下问题:Golang ClasetInterface.GetClassValueSpace方法的具体用法?Golang ClasetInterface.GetClassValueSpace怎么用?Golang ClasetInterface.GetClassValueSpace使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/shuLhan/tabula.ClasetInterface
的用法示例。
在下文中一共展示了ClasetInterface.GetClassValueSpace方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: ClassifySet
//
// ClassifySet given a samples predict their class by running each sample in
// forest, adn return their class prediction with confusion matrix.
// `samples` is the sample that will be predicted, `sampleIds` is the index of
// samples.
// If `sampleIds` is not nil, then sample index will be checked in each tree,
// if the sample is used for training, their vote is not counted.
//
// Algorithm,
//
// (0) Get value space (possible class values in dataset)
// (1) For each row in test-set,
// (1.1) collect votes in all trees,
// (1.2) select majority class vote, and
// (1.3) compute and save the actual class probabilities.
// (2) Compute confusion matrix from predictions.
// (3) Compute stat from confusion matrix.
// (4) Write the stat to file only if sampleIds is empty, which mean its run
// not from OOB set.
//
func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
sampleIds []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
stat := classifier.Stat{}
stat.Start()
if len(sampleIds) <= 0 {
fmt.Println(tag, "Classify set:", samples)
fmt.Println(tag, "Classify set sample (one row):",
samples.GetRow(0))
}
// (0)
vs := samples.GetClassValueSpace()
actuals := samples.GetClassAsStrings()
sampleIdx := -1
// (1)
rows := samples.GetRows()
for x, row := range *rows {
// (1.1)
if len(sampleIds) > 0 {
sampleIdx = sampleIds[x]
}
votes := forest.Votes(row, sampleIdx)
// (1.2)
classProbs := tekstus.WordsProbabilitiesOf(votes, vs, false)
_, idx, ok := numerus.Floats64FindMax(classProbs)
if ok {
predicts = append(predicts, vs[idx])
}
// (1.3)
probs = append(probs, classProbs[0])
}
// (2)
cm = forest.ComputeCM(sampleIds, vs, actuals, predicts)
// (3)
forest.ComputeStatFromCM(&stat, cm)
stat.End()
if len(sampleIds) <= 0 {
fmt.Println(tag, "CM:", cm)
fmt.Println(tag, "Classifying stat:", stat)
_ = stat.Write(forest.StatFile)
}
return predicts, cm, probs
}
示例2: computePerfByProbs
//
// computePerfByProbs will compute classifier performance using probabilities
// or score `probs`.
//
// This currently only work for two class problem.
//
func (rt *Runtime) computePerfByProbs(samples tabula.ClasetInterface,
actuals []string, probs []float64,
) {
vs := samples.GetClassValueSpace()
nactuals := numerus.IntsTo64(samples.Counts())
nclass := tekstus.WordsCountTokens(actuals, vs, false)
pprev := math.Inf(-1)
tp := int64(0)
fp := int64(0)
tpprev := int64(0)
fpprev := int64(0)
auc := float64(0)
for x, p := range probs {
if p != pprev {
stat := Stat{}
stat.SetTPRate(tp, nactuals[0])
stat.SetFPRate(fp, nactuals[1])
stat.SetPrecisionFromRate(nactuals[0], nactuals[1])
auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
stat.SetAUC(auc)
rt.perfs = append(rt.perfs, &stat)
pprev = p
tpprev = tp
fpprev = fp
}
if actuals[x] == vs[0] {
tp++
} else {
fp++
}
}
stat := Stat{}
stat.SetTPRate(tp, nactuals[0])
stat.SetFPRate(fp, nactuals[1])
stat.SetPrecisionFromRate(nactuals[0], nactuals[1])
auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
auc = auc / float64(nclass[0]*nclass[1])
stat.SetAUC(auc)
rt.perfs = append(rt.perfs, &stat)
if len(rt.perfs) >= 2 {
// Replace the first stat with second stat, because of NaN
// value on the first precision.
rt.perfs[0] = rt.perfs[1]
}
}
示例3: computeGain
/*
computeGain calculate the gini index for each value in each attribute.
*/
func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
gains []gini.Gini,
) {
switch runtime.SplitMethod {
case SplitMethodGini:
// create gains value for all attribute minus target class.
gains = make([]gini.Gini, D.GetNColumn())
}
runtime.SelectRandomFeature(D)
classVS := D.GetClassValueSpace()
classIdx := D.GetClassIndex()
classType := D.GetClassType()
for x, col := range *D.GetColumns() {
// skip class attribute.
if x == classIdx {
continue
}
// skip column flagged with parent
if (col.Flag & ColFlagParent) == ColFlagParent {
gains[x].Skip = true
continue
}
// ignore column flagged with skip
if (col.Flag & ColFlagSkip) == ColFlagSkip {
gains[x].Skip = true
continue
}
// compute gain.
if col.GetType() == tabula.TReal {
attr := col.ToFloatSlice()
if classType == tabula.TString {
target := D.GetClassAsStrings()
gains[x].ComputeContinu(&attr, &target,
&classVS)
} else {
targetReal := D.GetClassAsReals()
classVSReal := tekstus.StringsToFloat64(
classVS)
gains[x].ComputeContinuFloat(&attr,
&targetReal, &classVSReal)
}
} else {
attr := col.ToStringSlice()
attrV := col.ValueSpace
if DEBUG >= 2 {
fmt.Println("[cart] attr :", attr)
fmt.Println("[cart] attrV:", attrV)
}
target := D.GetClassAsStrings()
gains[x].ComputeDiscrete(&attr, &attrV, &target,
&classVS)
}
if DEBUG >= 2 {
fmt.Println("[cart] gain :", gains[x])
}
}
return
}
示例4: ClassifySetByWeight
//
// ClassifySetByWeight will classify each instance in samples by weight
// with respect to its single performance.
//
// Algorithm,
// (1) For each instance in samples,
// (1.1) for each stage,
// (1.1.1) collect votes for instance in current stage.
// (1.1.2) Compute probabilities of each classes in votes.
//
// prob_class = count_of_class / total_votes
//
// (1.1.3) Compute total of probabilites times of stage weight.
//
// stage_prob = prob_class * stage_weight
//
// (1.2) Divide each class stage probabilites with
//
// stage_prob = stage_prob /
// (sum_of_all_weights * number_of_tree_in_forest)
//
// (1.3) Select class label with highest probabilites.
// (1.4) Save stage probabilities for positive class.
// (2) Compute confusion matrix.
//
func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
sampleIds []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
stat := classifier.Stat{}
stat.Start()
vs := samples.GetClassValueSpace()
stageProbs := make([]float64, len(vs))
stageSumProbs := make([]float64, len(vs))
sumWeights := numerus.Floats64Sum(crf.weights)
// (1)
rows := samples.GetDataAsRows()
for _, row := range *rows {
for y := range stageSumProbs {
stageSumProbs[y] = 0
}
// (1.1)
for y, forest := range crf.forests {
// (1.1.1)
votes := forest.Votes(row, -1)
// (1.1.2)
probs := tekstus.WordsProbabilitiesOf(votes, vs, false)
// (1.1.3)
for z := range probs {
stageSumProbs[z] += probs[z]
stageProbs[z] += probs[z] * crf.weights[y]
}
}
// (1.2)
stageWeight := sumWeights * float64(crf.NTree)
for x := range stageProbs {
stageProbs[x] = stageProbs[x] / stageWeight
}
// (1.3)
_, maxi, ok := numerus.Floats64FindMax(stageProbs)
if ok {
predicts = append(predicts, vs[maxi])
}
probs = append(probs, stageSumProbs[0]/
float64(len(crf.forests)))
}
// (2)
actuals := samples.GetClassAsStrings()
cm = crf.ComputeCM(sampleIds, vs, actuals, predicts)
crf.ComputeStatFromCM(&stat, cm)
stat.End()
_ = stat.Write(crf.StatFile)
return predicts, cm, probs
}