本文整理汇总了Golang中github.com/shuLhan/tabula.ClasetInterface类的典型用法代码示例。如果您正苦于以下问题:Golang ClasetInterface类的具体用法?Golang ClasetInterface怎么用?Golang ClasetInterface使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ClasetInterface类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: deleteTrueNegative
func (crf *Runtime) deleteTrueNegative(samples tabula.ClasetInterface,
cm *classifier.CM,
) {
var row *tabula.Row
tnids := cm.TNIndices()
sort.Ints(tnids)
// (1)
if len(crf.weights) <= 1 {
for _, i := range tnids {
crf.tnset.PushRow(samples.GetRow(i))
}
}
// (2)
c := 0
for x, i := range tnids {
row = samples.DeleteRow(i - x)
if row != nil {
c++
}
}
if DEBUG >= 1 {
fmt.Println(tag, "# TN", len(tnids), "# deleted", c)
}
}
示例2: Build
//
// Build given a sample dataset, build the stage with randomforest.
//
func (crf *Runtime) Build(samples tabula.ClasetInterface) (e error) {
if samples == nil {
return ErrNoInput
}
e = crf.Initialize(samples)
if e != nil {
return
}
fmt.Println(tag, "Training samples:", samples)
fmt.Println(tag, "Sample (one row):", samples.GetRow(0))
fmt.Println(tag, "Config:", crf)
for x := 0; x < crf.NStage; x++ {
if DEBUG >= 1 {
fmt.Println(tag, "Stage #", x)
}
forest, e := crf.createForest(samples)
if e != nil {
return e
}
e = crf.finalizeStage(forest)
if e != nil {
return e
}
}
return crf.Finalize()
}
示例3: Initialize
//
// Initialize will check forest inputs and set it to default values if invalid.
//
// It will also calculate number of random samples for each tree using,
//
// number-of-sample * percentage-of-bootstrap
//
//
func (forest *Runtime) Initialize(samples tabula.ClasetInterface) error {
if forest.NTree <= 0 {
forest.NTree = DefNumTree
}
if forest.PercentBoot <= 0 {
forest.PercentBoot = DefPercentBoot
}
if forest.NRandomFeature <= 0 {
// Set default value to square-root of features.
ncol := samples.GetNColumn() - 1
forest.NRandomFeature = int(math.Sqrt(float64(ncol)))
}
if forest.OOBStatsFile == "" {
forest.OOBStatsFile = DefOOBStatsFile
}
if forest.PerfFile == "" {
forest.PerfFile = DefPerfFile
}
if forest.StatFile == "" {
forest.StatFile = DefStatFile
}
forest.nSubsample = int(float32(samples.GetNRow()) *
(float32(forest.PercentBoot) / 100.0))
return forest.Runtime.Initialize()
}
示例4: Initialize
//
// Initialize will check crf inputs and set it to default values if its
// invalid.
//
func (crf *Runtime) Initialize(samples tabula.ClasetInterface) error {
if crf.NStage <= 0 {
crf.NStage = DefStage
}
if crf.TPRate <= 0 || crf.TPRate >= 1 {
crf.TPRate = DefTPRate
}
if crf.TNRate <= 0 || crf.TNRate >= 1 {
crf.TNRate = DefTNRate
}
if crf.NTree <= 0 {
crf.NTree = DefNumTree
}
if crf.PercentBoot <= 0 {
crf.PercentBoot = DefPercentBoot
}
if crf.NRandomFeature <= 0 {
// Set default value to square-root of features.
ncol := samples.GetNColumn() - 1
crf.NRandomFeature = int(math.Sqrt(float64(ncol)))
}
if crf.PerfFile == "" {
crf.PerfFile = DefPerfFile
}
if crf.StatFile == "" {
crf.StatFile = DefStatFile
}
crf.tnset = samples.Clone().(*tabula.Claset)
return crf.Runtime.Initialize()
}
示例5: computePerfByProbs
//
// computePerfByProbs will compute classifier performance using probabilities
// or score `probs`.
//
// This currently only work for two class problem.
//
func (rt *Runtime) computePerfByProbs(samples tabula.ClasetInterface,
actuals []string, probs []float64,
) {
vs := samples.GetClassValueSpace()
nactuals := numerus.IntsTo64(samples.Counts())
nclass := tekstus.WordsCountTokens(actuals, vs, false)
pprev := math.Inf(-1)
tp := int64(0)
fp := int64(0)
tpprev := int64(0)
fpprev := int64(0)
auc := float64(0)
for x, p := range probs {
if p != pprev {
stat := Stat{}
stat.SetTPRate(tp, nactuals[0])
stat.SetFPRate(fp, nactuals[1])
stat.SetPrecisionFromRate(nactuals[0], nactuals[1])
auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
stat.SetAUC(auc)
rt.perfs = append(rt.perfs, &stat)
pprev = p
tpprev = tp
fpprev = fp
}
if actuals[x] == vs[0] {
tp++
} else {
fp++
}
}
stat := Stat{}
stat.SetTPRate(tp, nactuals[0])
stat.SetFPRate(fp, nactuals[1])
stat.SetPrecisionFromRate(nactuals[0], nactuals[1])
auc = auc + trapezoidArea(fp, fpprev, tp, tpprev)
auc = auc / float64(nclass[0]*nclass[1])
stat.SetAUC(auc)
rt.perfs = append(rt.perfs, &stat)
if len(rt.perfs) >= 2 {
// Replace the first stat with second stat, because of NaN
// value on the first precision.
rt.perfs[0] = rt.perfs[1]
}
}
示例6: Init
//
// Init will initialize LNSmote runtime by checking input values and set it to
// default if not set or invalid.
//
func (in *Runtime) Init(dataset tabula.ClasetInterface) {
in.Runtime.Init()
in.NSynthetic = in.PercentOver / 100.0
in.datasetRows = dataset.GetDataAsRows()
in.minorset = tabula.SelectRowsWhere(dataset, in.ClassIndex,
in.ClassMinor)
in.outliers = make(tabula.Rows, 0)
if DEBUG >= 1 {
fmt.Println("[lnsmote] n:", in.NSynthetic)
fmt.Println("[lnsmote] n minority:", in.minorset.Len())
}
}
示例7: ClassifySet
//
// ClassifySet given a samples predict their class by running each sample in
// forest, adn return their class prediction with confusion matrix.
// `samples` is the sample that will be predicted, `sampleIds` is the index of
// samples.
// If `sampleIds` is not nil, then sample index will be checked in each tree,
// if the sample is used for training, their vote is not counted.
//
// Algorithm,
//
// (0) Get value space (possible class values in dataset)
// (1) For each row in test-set,
// (1.1) collect votes in all trees,
// (1.2) select majority class vote, and
// (1.3) compute and save the actual class probabilities.
// (2) Compute confusion matrix from predictions.
// (3) Compute stat from confusion matrix.
// (4) Write the stat to file only if sampleIds is empty, which mean its run
// not from OOB set.
//
func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
sampleIds []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
stat := classifier.Stat{}
stat.Start()
if len(sampleIds) <= 0 {
fmt.Println(tag, "Classify set:", samples)
fmt.Println(tag, "Classify set sample (one row):",
samples.GetRow(0))
}
// (0)
vs := samples.GetClassValueSpace()
actuals := samples.GetClassAsStrings()
sampleIdx := -1
// (1)
rows := samples.GetRows()
for x, row := range *rows {
// (1.1)
if len(sampleIds) > 0 {
sampleIdx = sampleIds[x]
}
votes := forest.Votes(row, sampleIdx)
// (1.2)
classProbs := tekstus.WordsProbabilitiesOf(votes, vs, false)
_, idx, ok := numerus.Floats64FindMax(classProbs)
if ok {
predicts = append(predicts, vs[idx])
}
// (1.3)
probs = append(probs, classProbs[0])
}
// (2)
cm = forest.ComputeCM(sampleIds, vs, actuals, predicts)
// (3)
forest.ComputeStatFromCM(&stat, cm)
stat.End()
if len(sampleIds) <= 0 {
fmt.Println(tag, "CM:", cm)
fmt.Println(tag, "Classifying stat:", stat)
_ = stat.Write(forest.StatFile)
}
return predicts, cm, probs
}
示例8: SelectRandomFeature
// SelectRandomFeature if NRandomFeature is greater than zero, select and
// compute gain in n random features instead of in all features
func (runtime *Runtime) SelectRandomFeature(D tabula.ClasetInterface) {
if runtime.NRandomFeature <= 0 {
// all features selected
return
}
ncols := D.GetNColumn()
// count all features minus class
nfeature := ncols - 1
if runtime.NRandomFeature >= nfeature {
// Do nothing if number of random feature equal or greater than
// number of feature in dataset.
return
}
// exclude class index and parent node index
excludeIdx := []int{D.GetClassIndex()}
cols := D.GetColumns()
for x, col := range *cols {
if (col.Flag & ColFlagParent) == ColFlagParent {
excludeIdx = append(excludeIdx, x)
} else {
(*cols)[x].Flag |= ColFlagSkip
}
}
// Select random features excluding feature in `excludeIdx`.
var pickedIdx []int
for x := 0; x < runtime.NRandomFeature; x++ {
idx := numerus.IntPickRandPositive(ncols, false, pickedIdx,
excludeIdx)
pickedIdx = append(pickedIdx, idx)
// Remove skip flag on selected column
col := D.GetColumn(idx)
col.Flag = col.Flag &^ ColFlagSkip
}
if DEBUG >= 1 {
fmt.Println("[cart] selected random features:", pickedIdx)
fmt.Println("[cart] selected columns :", D.GetColumns())
}
}
示例9: Performance
//
// Performance given an actuals class label and their probabilities, compute
// the performance statistic of classifier.
//
// Algorithm,
// (1) Sort the probabilities in descending order.
// (2) Sort the actuals and predicts using sorted index from probs
// (3) Compute tpr, fpr, precision
// (4) Write performance to file.
//
func (rt *Runtime) Performance(samples tabula.ClasetInterface,
predicts []string, probs []float64,
) (
perfs Stats,
) {
// (1)
actuals := samples.GetClassAsStrings()
sortedIds := numerus.IntCreateSeq(0, len(probs)-1)
numerus.Floats64InplaceMergesort(probs, sortedIds, 0, len(probs),
false)
// (2)
tekstus.StringsSortByIndex(&actuals, sortedIds)
tekstus.StringsSortByIndex(&predicts, sortedIds)
// (3)
rt.computePerfByProbs(samples, actuals, probs)
return rt.perfs
}
示例10: refillWithFP
//
// refillWithFP will copy the false-positive data in training set `tnset`
// and append it to `samples`.
//
func (crf *Runtime) refillWithFP(samples, tnset tabula.ClasetInterface,
cm *classifier.CM,
) {
// Get and sort FP.
fpids := cm.FPIndices()
sort.Ints(fpids)
// Move FP samples from TN-set to training set samples.
for _, i := range fpids {
samples.PushRow(tnset.GetRow(i))
}
// Delete FP from training set.
var row *tabula.Row
c := 0
for x, i := range fpids {
row = tnset.DeleteRow(i - x)
if row != nil {
c++
}
}
if DEBUG >= 1 {
fmt.Println(tag, "# FP", len(fpids), "# refilled", c)
}
}
示例11: Build
/*
Build the forest using samples dataset.
Algorithm,
(0) Recheck input value: number of tree, percentage bootstrap, etc; and
Open statistic file output.
(1) For 0 to NTree,
(1.1) Create new tree, repeat until all trees has been build.
(2) Compute and write total statistic.
*/
func (forest *Runtime) Build(samples tabula.ClasetInterface) (e error) {
// check input samples
if samples == nil {
return ErrNoInput
}
// (0)
e = forest.Initialize(samples)
if e != nil {
return
}
fmt.Println(tag, "Training set :", samples)
fmt.Println(tag, "Sample (one row):", samples.GetRow(0))
fmt.Println(tag, "Forest config :", forest)
// (1)
for t := 0; t < forest.NTree; t++ {
if DEBUG >= 1 {
fmt.Println(tag, "tree #", t)
}
// (1.1)
for {
_, _, e = forest.GrowTree(samples)
if e == nil {
break
}
fmt.Println(tag, "error:", e)
}
}
// (2)
return forest.Finalize()
}
示例12: ClassifySet
/*
ClassifySet set the class attribute based on tree classification.
*/
func (runtime *Runtime) ClassifySet(data tabula.ClasetInterface) (e error) {
nrow := data.GetNRow()
targetAttr := data.GetClassColumn()
for i := 0; i < nrow; i++ {
class := runtime.Classify(data.GetRow(i))
_ = (*targetAttr).Records[i].SetValue(class, tabula.TString)
}
return
}
示例13: computeGain
/*
computeGain calculate the gini index for each value in each attribute.
*/
func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
gains []gini.Gini,
) {
switch runtime.SplitMethod {
case SplitMethodGini:
// create gains value for all attribute minus target class.
gains = make([]gini.Gini, D.GetNColumn())
}
runtime.SelectRandomFeature(D)
classVS := D.GetClassValueSpace()
classIdx := D.GetClassIndex()
classType := D.GetClassType()
for x, col := range *D.GetColumns() {
// skip class attribute.
if x == classIdx {
continue
}
// skip column flagged with parent
if (col.Flag & ColFlagParent) == ColFlagParent {
gains[x].Skip = true
continue
}
// ignore column flagged with skip
if (col.Flag & ColFlagSkip) == ColFlagSkip {
gains[x].Skip = true
continue
}
// compute gain.
if col.GetType() == tabula.TReal {
attr := col.ToFloatSlice()
if classType == tabula.TString {
target := D.GetClassAsStrings()
gains[x].ComputeContinu(&attr, &target,
&classVS)
} else {
targetReal := D.GetClassAsReals()
classVSReal := tekstus.StringsToFloat64(
classVS)
gains[x].ComputeContinuFloat(&attr,
&targetReal, &classVSReal)
}
} else {
attr := col.ToStringSlice()
attrV := col.ValueSpace
if DEBUG >= 2 {
fmt.Println("[cart] attr :", attr)
fmt.Println("[cart] attrV:", attrV)
}
target := D.GetClassAsStrings()
gains[x].ComputeDiscrete(&attr, &attrV, &target,
&classVS)
}
if DEBUG >= 2 {
fmt.Println("[cart] gain :", gains[x])
}
}
return
}
示例14: splitTreeByGain
/*
splitTreeByGain calculate the gain in all dataset, and split into two node:
left and right.
Return node with the split information.
*/
func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
node *binary.BTNode,
e error,
) {
node = &binary.BTNode{}
D.RecountMajorMinor()
// if dataset is empty return node labeled with majority classes in
// dataset.
nrow := D.GetNRow()
if nrow <= 0 {
if DEBUG >= 2 {
fmt.Printf("[cart] empty dataset (%s) : %v\n",
D.MajorityClass(), D)
}
node.Value = NodeValue{
IsLeaf: true,
Class: D.MajorityClass(),
Size: 0,
}
return node, nil
}
// if all dataset is in the same class, return node as leaf with class
// is set to that class.
single, name := D.IsInSingleClass()
if single {
if DEBUG >= 2 {
fmt.Printf("[cart] in single class (%s): %v\n", name,
D.GetColumns())
}
node.Value = NodeValue{
IsLeaf: true,
Class: name,
Size: nrow,
}
return node, nil
}
if DEBUG >= 2 {
fmt.Println("[cart] D:", D)
}
// calculate the Gini gain for each attribute.
gains := runtime.computeGain(D)
// get attribute with maximum Gini gain.
MaxGainIdx := gini.FindMaxGain(&gains)
MaxGain := gains[MaxGainIdx]
// if maxgain value is 0, use majority class as node and terminate
// the process
if MaxGain.GetMaxGainValue() == 0 {
if DEBUG >= 2 {
fmt.Println("[cart] max gain 0 with target",
D.GetClassAsStrings(),
" and majority class is ", D.MajorityClass())
}
node.Value = NodeValue{
IsLeaf: true,
Class: D.MajorityClass(),
Size: 0,
}
return node, nil
}
// using the sorted index in MaxGain, sort all field in dataset
tabula.SortColumnsByIndex(D, MaxGain.SortedIndex)
if DEBUG >= 2 {
fmt.Println("[cart] maxgain:", MaxGain)
}
// Now that we have attribute with max gain in MaxGainIdx, and their
// gain dan partition value in Gains[MaxGainIdx] and
// GetMaxPartValue(), we split the dataset based on type of max-gain
// attribute.
// If its continuous, split the attribute using numeric value.
// If its discrete, split the attribute using subset (partition) of
// nominal values.
var splitV interface{}
if MaxGain.IsContinu {
splitV = MaxGain.GetMaxPartGainValue()
} else {
attrPartV := MaxGain.GetMaxPartGainValue()
attrSubV := attrPartV.(tekstus.ListStrings)
splitV = attrSubV[0].Normalize()
}
//.........这里部分代码省略.........
示例15: ClassifySetByWeight
//
// ClassifySetByWeight will classify each instance in samples by weight
// with respect to its single performance.
//
// Algorithm,
// (1) For each instance in samples,
// (1.1) for each stage,
// (1.1.1) collect votes for instance in current stage.
// (1.1.2) Compute probabilities of each classes in votes.
//
// prob_class = count_of_class / total_votes
//
// (1.1.3) Compute total of probabilites times of stage weight.
//
// stage_prob = prob_class * stage_weight
//
// (1.2) Divide each class stage probabilites with
//
// stage_prob = stage_prob /
// (sum_of_all_weights * number_of_tree_in_forest)
//
// (1.3) Select class label with highest probabilites.
// (1.4) Save stage probabilities for positive class.
// (2) Compute confusion matrix.
//
func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
sampleIds []int,
) (
predicts []string, cm *classifier.CM, probs []float64,
) {
stat := classifier.Stat{}
stat.Start()
vs := samples.GetClassValueSpace()
stageProbs := make([]float64, len(vs))
stageSumProbs := make([]float64, len(vs))
sumWeights := numerus.Floats64Sum(crf.weights)
// (1)
rows := samples.GetDataAsRows()
for _, row := range *rows {
for y := range stageSumProbs {
stageSumProbs[y] = 0
}
// (1.1)
for y, forest := range crf.forests {
// (1.1.1)
votes := forest.Votes(row, -1)
// (1.1.2)
probs := tekstus.WordsProbabilitiesOf(votes, vs, false)
// (1.1.3)
for z := range probs {
stageSumProbs[z] += probs[z]
stageProbs[z] += probs[z] * crf.weights[y]
}
}
// (1.2)
stageWeight := sumWeights * float64(crf.NTree)
for x := range stageProbs {
stageProbs[x] = stageProbs[x] / stageWeight
}
// (1.3)
_, maxi, ok := numerus.Floats64FindMax(stageProbs)
if ok {
predicts = append(predicts, vs[maxi])
}
probs = append(probs, stageSumProbs[0]/
float64(len(crf.forests)))
}
// (2)
actuals := samples.GetClassAsStrings()
cm = crf.ComputeCM(sampleIds, vs, actuals, predicts)
crf.ComputeStatFromCM(&stat, cm)
stat.End()
_ = stat.Write(crf.StatFile)
return predicts, cm, probs
}