本文整理汇总了Golang中github.com/shuLhan/tabula.ClasetInterface.GetClassAsStrings方法的典型用法代码示例。如果您正苦于以下问题:Golang ClasetInterface.GetClassAsStrings方法的具体用法?Golang ClasetInterface.GetClassAsStrings怎么用?Golang ClasetInterface.GetClassAsStrings使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/shuLhan/tabula.ClasetInterface
的用法示例。
在下文中一共展示了ClasetInterface.GetClassAsStrings方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: ClassifySet
//
// ClassifySet given a samples predict their class by running each sample in
// forest, adn return their class prediction with confusion matrix.
// `samples` is the sample that will be predicted, `sampleIds` is the index of
// samples.
// If `sampleIds` is not nil, then sample index will be checked in each tree,
// if the sample is used for training, their vote is not counted.
//
// Algorithm,
//
// (0) Get value space (possible class values in dataset)
// (1) For each row in test-set,
// (1.1) collect votes in all trees,
// (1.2) select majority class vote, and
// (1.3) compute and save the actual class probabilities.
// (2) Compute confusion matrix from predictions.
// (3) Compute stat from confusion matrix.
// (4) Write the stat to file only if sampleIds is empty, which mean its run
// not from OOB set.
//
func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
sampleIds []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
stat := classifier.Stat{}
stat.Start()
if len(sampleIds) <= 0 {
fmt.Println(tag, "Classify set:", samples)
fmt.Println(tag, "Classify set sample (one row):",
samples.GetRow(0))
}
// (0)
vs := samples.GetClassValueSpace()
actuals := samples.GetClassAsStrings()
sampleIdx := -1
// (1)
rows := samples.GetRows()
for x, row := range *rows {
// (1.1)
if len(sampleIds) > 0 {
sampleIdx = sampleIds[x]
}
votes := forest.Votes(row, sampleIdx)
// (1.2)
classProbs := tekstus.WordsProbabilitiesOf(votes, vs, false)
_, idx, ok := numerus.Floats64FindMax(classProbs)
if ok {
predicts = append(predicts, vs[idx])
}
// (1.3)
probs = append(probs, classProbs[0])
}
// (2)
cm = forest.ComputeCM(sampleIds, vs, actuals, predicts)
// (3)
forest.ComputeStatFromCM(&stat, cm)
stat.End()
if len(sampleIds) <= 0 {
fmt.Println(tag, "CM:", cm)
fmt.Println(tag, "Classifying stat:", stat)
_ = stat.Write(forest.StatFile)
}
return predicts, cm, probs
}
示例2: Performance
//
// Performance given an actuals class label and their probabilities, compute
// the performance statistic of classifier.
//
// Algorithm,
// (1) Sort the probabilities in descending order.
// (2) Sort the actuals and predicts using sorted index from probs
// (3) Compute tpr, fpr, precision
// (4) Write performance to file.
//
func (rt *Runtime) Performance(samples tabula.ClasetInterface,
predicts []string, probs []float64,
) (
perfs Stats,
) {
// (1)
actuals := samples.GetClassAsStrings()
sortedIds := numerus.IntCreateSeq(0, len(probs)-1)
numerus.Floats64InplaceMergesort(probs, sortedIds, 0, len(probs),
false)
// (2)
tekstus.StringsSortByIndex(&actuals, sortedIds)
tekstus.StringsSortByIndex(&predicts, sortedIds)
// (3)
rt.computePerfByProbs(samples, actuals, probs)
return rt.perfs
}
示例3: computeGain
/*
computeGain calculate the gini index for each value in each attribute.
*/
func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
gains []gini.Gini,
) {
switch runtime.SplitMethod {
case SplitMethodGini:
// create gains value for all attribute minus target class.
gains = make([]gini.Gini, D.GetNColumn())
}
runtime.SelectRandomFeature(D)
classVS := D.GetClassValueSpace()
classIdx := D.GetClassIndex()
classType := D.GetClassType()
for x, col := range *D.GetColumns() {
// skip class attribute.
if x == classIdx {
continue
}
// skip column flagged with parent
if (col.Flag & ColFlagParent) == ColFlagParent {
gains[x].Skip = true
continue
}
// ignore column flagged with skip
if (col.Flag & ColFlagSkip) == ColFlagSkip {
gains[x].Skip = true
continue
}
// compute gain.
if col.GetType() == tabula.TReal {
attr := col.ToFloatSlice()
if classType == tabula.TString {
target := D.GetClassAsStrings()
gains[x].ComputeContinu(&attr, &target,
&classVS)
} else {
targetReal := D.GetClassAsReals()
classVSReal := tekstus.StringsToFloat64(
classVS)
gains[x].ComputeContinuFloat(&attr,
&targetReal, &classVSReal)
}
} else {
attr := col.ToStringSlice()
attrV := col.ValueSpace
if DEBUG >= 2 {
fmt.Println("[cart] attr :", attr)
fmt.Println("[cart] attrV:", attrV)
}
target := D.GetClassAsStrings()
gains[x].ComputeDiscrete(&attr, &attrV, &target,
&classVS)
}
if DEBUG >= 2 {
fmt.Println("[cart] gain :", gains[x])
}
}
return
}
示例4: splitTreeByGain
/*
splitTreeByGain calculate the gain in all dataset, and split into two node:
left and right.
Return node with the split information.
*/
func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
node *binary.BTNode,
e error,
) {
node = &binary.BTNode{}
D.RecountMajorMinor()
// if dataset is empty return node labeled with majority classes in
// dataset.
nrow := D.GetNRow()
if nrow <= 0 {
if DEBUG >= 2 {
fmt.Printf("[cart] empty dataset (%s) : %v\n",
D.MajorityClass(), D)
}
node.Value = NodeValue{
IsLeaf: true,
Class: D.MajorityClass(),
Size: 0,
}
return node, nil
}
// if all dataset is in the same class, return node as leaf with class
// is set to that class.
single, name := D.IsInSingleClass()
if single {
if DEBUG >= 2 {
fmt.Printf("[cart] in single class (%s): %v\n", name,
D.GetColumns())
}
node.Value = NodeValue{
IsLeaf: true,
Class: name,
Size: nrow,
}
return node, nil
}
if DEBUG >= 2 {
fmt.Println("[cart] D:", D)
}
// calculate the Gini gain for each attribute.
gains := runtime.computeGain(D)
// get attribute with maximum Gini gain.
MaxGainIdx := gini.FindMaxGain(&gains)
MaxGain := gains[MaxGainIdx]
// if maxgain value is 0, use majority class as node and terminate
// the process
if MaxGain.GetMaxGainValue() == 0 {
if DEBUG >= 2 {
fmt.Println("[cart] max gain 0 with target",
D.GetClassAsStrings(),
" and majority class is ", D.MajorityClass())
}
node.Value = NodeValue{
IsLeaf: true,
Class: D.MajorityClass(),
Size: 0,
}
return node, nil
}
// using the sorted index in MaxGain, sort all field in dataset
tabula.SortColumnsByIndex(D, MaxGain.SortedIndex)
if DEBUG >= 2 {
fmt.Println("[cart] maxgain:", MaxGain)
}
// Now that we have attribute with max gain in MaxGainIdx, and their
// gain dan partition value in Gains[MaxGainIdx] and
// GetMaxPartValue(), we split the dataset based on type of max-gain
// attribute.
// If its continuous, split the attribute using numeric value.
// If its discrete, split the attribute using subset (partition) of
// nominal values.
var splitV interface{}
if MaxGain.IsContinu {
splitV = MaxGain.GetMaxPartGainValue()
} else {
attrPartV := MaxGain.GetMaxPartGainValue()
attrSubV := attrPartV.(tekstus.ListStrings)
splitV = attrSubV[0].Normalize()
}
//.........这里部分代码省略.........
示例5: ClassifySetByWeight
//
// ClassifySetByWeight will classify each instance in samples by weight
// with respect to its single performance.
//
// Algorithm,
// (1) For each instance in samples,
// (1.1) for each stage,
// (1.1.1) collect votes for instance in current stage.
// (1.1.2) Compute probabilities of each classes in votes.
//
// prob_class = count_of_class / total_votes
//
// (1.1.3) Compute total of probabilites times of stage weight.
//
// stage_prob = prob_class * stage_weight
//
// (1.2) Divide each class stage probabilites with
//
// stage_prob = stage_prob /
// (sum_of_all_weights * number_of_tree_in_forest)
//
// (1.3) Select class label with highest probabilites.
// (1.4) Save stage probabilities for positive class.
// (2) Compute confusion matrix.
//
func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
sampleIds []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
stat := classifier.Stat{}
stat.Start()
vs := samples.GetClassValueSpace()
stageProbs := make([]float64, len(vs))
stageSumProbs := make([]float64, len(vs))
sumWeights := numerus.Floats64Sum(crf.weights)
// (1)
rows := samples.GetDataAsRows()
for _, row := range *rows {
for y := range stageSumProbs {
stageSumProbs[y] = 0
}
// (1.1)
for y, forest := range crf.forests {
// (1.1.1)
votes := forest.Votes(row, -1)
// (1.1.2)
probs := tekstus.WordsProbabilitiesOf(votes, vs, false)
// (1.1.3)
for z := range probs {
stageSumProbs[z] += probs[z]
stageProbs[z] += probs[z] * crf.weights[y]
}
}
// (1.2)
stageWeight := sumWeights * float64(crf.NTree)
for x := range stageProbs {
stageProbs[x] = stageProbs[x] / stageWeight
}
// (1.3)
_, maxi, ok := numerus.Floats64FindMax(stageProbs)
if ok {
predicts = append(predicts, vs[maxi])
}
probs = append(probs, stageSumProbs[0]/
float64(len(crf.forests)))
}
// (2)
actuals := samples.GetClassAsStrings()
cm = crf.ComputeCM(sampleIds, vs, actuals, predicts)
crf.ComputeStatFromCM(&stat, cm)
stat.End()
_ = stat.Write(crf.StatFile)
return predicts, cm, probs
}