Golang ClasetInterface.GetClassAsStrings方法代码示例

本文整理汇总了Golang中github.com/shuLhan/tabula.ClasetInterface.GetClassAsStrings方法的典型用法代码示例。如果您正苦于以下问题：Golang ClasetInterface.GetClassAsStrings方法的具体用法？Golang ClasetInterface.GetClassAsStrings怎么用？Golang ClasetInterface.GetClassAsStrings使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/shuLhan/tabula.ClasetInterface的用法示例。

在下文中一共展示了ClasetInterface.GetClassAsStrings方法的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Golang代码示例。

示例1: ClassifySet

//
// ClassifySet given a samples predict their class by running each sample in
// forest, adn return their class prediction with confusion matrix.
// `samples` is the sample that will be predicted, `sampleIds` is the index of
// samples.
// If `sampleIds` is not nil, then sample index will be checked in each tree,
// if the sample is used for training, their vote is not counted.
//
// Algorithm,
//
// (0) Get value space (possible class values in dataset)
// (1) For each row in test-set,
// (1.1) collect votes in all trees,
// (1.2) select majority class vote, and
// (1.3) compute and save the actual class probabilities.
// (2) Compute confusion matrix from predictions.
// (3) Compute stat from confusion matrix.
// (4) Write the stat to file only if sampleIds is empty, which mean its run
// not from OOB set.
//
func (forest *Runtime) ClassifySet(samples tabula.ClasetInterface,
	sampleIds []int,
) (
	predicts []string, cm *classifier.CM, probs []float64,
) {
	stat := classifier.Stat{}
	stat.Start()

	if len(sampleIds) <= 0 {
		fmt.Println(tag, "Classify set:", samples)
		fmt.Println(tag, "Classify set sample (one row):",
			samples.GetRow(0))
	}

	// (0)
	vs := samples.GetClassValueSpace()
	actuals := samples.GetClassAsStrings()
	sampleIdx := -1

	// (1)
	rows := samples.GetRows()
	for x, row := range *rows {
		// (1.1)
		if len(sampleIds) > 0 {
			sampleIdx = sampleIds[x]
		}
		votes := forest.Votes(row, sampleIdx)

		// (1.2)
		classProbs := tekstus.WordsProbabilitiesOf(votes, vs, false)

		_, idx, ok := numerus.Floats64FindMax(classProbs)

		if ok {
			predicts = append(predicts, vs[idx])
		}

		// (1.3)
		probs = append(probs, classProbs[0])
	}

	// (2)
	cm = forest.ComputeCM(sampleIds, vs, actuals, predicts)

	// (3)
	forest.ComputeStatFromCM(&stat, cm)
	stat.End()

	if len(sampleIds) <= 0 {
		fmt.Println(tag, "CM:", cm)
		fmt.Println(tag, "Classifying stat:", stat)
		_ = stat.Write(forest.StatFile)
	}

	return predicts, cm, probs
}

开发者ID:shuLhan，项目名称:go-mining，代码行数:76，代码来源:rf.go

示例2: Performance

//
// Performance given an actuals class label and their probabilities, compute
// the performance statistic of classifier.
//
// Algorithm,
// (1) Sort the probabilities in descending order.
// (2) Sort the actuals and predicts using sorted index from probs
// (3) Compute tpr, fpr, precision
// (4) Write performance to file.
//
func (rt *Runtime) Performance(samples tabula.ClasetInterface,
	predicts []string, probs []float64,
) (
	perfs Stats,
) {
	// (1)
	actuals := samples.GetClassAsStrings()
	sortedIds := numerus.IntCreateSeq(0, len(probs)-1)
	numerus.Floats64InplaceMergesort(probs, sortedIds, 0, len(probs),
		false)

	// (2)
	tekstus.StringsSortByIndex(&actuals, sortedIds)
	tekstus.StringsSortByIndex(&predicts, sortedIds)

	// (3)
	rt.computePerfByProbs(samples, actuals, probs)

	return rt.perfs
}

开发者ID:shuLhan，项目名称:go-mining，代码行数:30，代码来源:runtime.go

示例3: computeGain

/*
computeGain calculate the gini index for each value in each attribute.
*/
func (runtime *Runtime) computeGain(D tabula.ClasetInterface) (
	gains []gini.Gini,
) {
	switch runtime.SplitMethod {
	case SplitMethodGini:
		// create gains value for all attribute minus target class.
		gains = make([]gini.Gini, D.GetNColumn())
	}

	runtime.SelectRandomFeature(D)

	classVS := D.GetClassValueSpace()
	classIdx := D.GetClassIndex()
	classType := D.GetClassType()

	for x, col := range *D.GetColumns() {
		// skip class attribute.
		if x == classIdx {
			continue
		}

		// skip column flagged with parent
		if (col.Flag & ColFlagParent) == ColFlagParent {
			gains[x].Skip = true
			continue
		}

		// ignore column flagged with skip
		if (col.Flag & ColFlagSkip) == ColFlagSkip {
			gains[x].Skip = true
			continue
		}

		// compute gain.
		if col.GetType() == tabula.TReal {
			attr := col.ToFloatSlice()

			if classType == tabula.TString {
				target := D.GetClassAsStrings()
				gains[x].ComputeContinu(&attr, &target,
					&classVS)
			} else {
				targetReal := D.GetClassAsReals()
				classVSReal := tekstus.StringsToFloat64(
					classVS)

				gains[x].ComputeContinuFloat(&attr,
					&targetReal, &classVSReal)
			}
		} else {
			attr := col.ToStringSlice()
			attrV := col.ValueSpace

			if DEBUG >= 2 {
				fmt.Println("[cart] attr :", attr)
				fmt.Println("[cart] attrV:", attrV)
			}

			target := D.GetClassAsStrings()
			gains[x].ComputeDiscrete(&attr, &attrV, &target,
				&classVS)
		}

		if DEBUG >= 2 {
			fmt.Println("[cart] gain :", gains[x])
		}
	}
	return
}

开发者ID:shuLhan，项目名称:go-mining，代码行数:72，代码来源:cart.go

示例4: splitTreeByGain

/*
splitTreeByGain calculate the gain in all dataset, and split into two node:
left and right.

Return node with the split information.
*/
func (runtime *Runtime) splitTreeByGain(D tabula.ClasetInterface) (
	node *binary.BTNode,
	e error,
) {
	node = &binary.BTNode{}

	D.RecountMajorMinor()

	// if dataset is empty return node labeled with majority classes in
	// dataset.
	nrow := D.GetNRow()

	if nrow <= 0 {
		if DEBUG >= 2 {
			fmt.Printf("[cart] empty dataset (%s) : %v\n",
				D.MajorityClass(), D)
		}

		node.Value = NodeValue{
			IsLeaf: true,
			Class:  D.MajorityClass(),
			Size:   0,
		}
		return node, nil
	}

	// if all dataset is in the same class, return node as leaf with class
	// is set to that class.
	single, name := D.IsInSingleClass()
	if single {
		if DEBUG >= 2 {
			fmt.Printf("[cart] in single class (%s): %v\n", name,
				D.GetColumns())
		}

		node.Value = NodeValue{
			IsLeaf: true,
			Class:  name,
			Size:   nrow,
		}
		return node, nil
	}

	if DEBUG >= 2 {
		fmt.Println("[cart] D:", D)
	}

	// calculate the Gini gain for each attribute.
	gains := runtime.computeGain(D)

	// get attribute with maximum Gini gain.
	MaxGainIdx := gini.FindMaxGain(&gains)
	MaxGain := gains[MaxGainIdx]

	// if maxgain value is 0, use majority class as node and terminate
	// the process
	if MaxGain.GetMaxGainValue() == 0 {
		if DEBUG >= 2 {
			fmt.Println("[cart] max gain 0 with target",
				D.GetClassAsStrings(),
				" and majority class is ", D.MajorityClass())
		}

		node.Value = NodeValue{
			IsLeaf: true,
			Class:  D.MajorityClass(),
			Size:   0,
		}
		return node, nil
	}

	// using the sorted index in MaxGain, sort all field in dataset
	tabula.SortColumnsByIndex(D, MaxGain.SortedIndex)

	if DEBUG >= 2 {
		fmt.Println("[cart] maxgain:", MaxGain)
	}

	// Now that we have attribute with max gain in MaxGainIdx, and their
	// gain dan partition value in Gains[MaxGainIdx] and
	// GetMaxPartValue(), we split the dataset based on type of max-gain
	// attribute.
	// If its continuous, split the attribute using numeric value.
	// If its discrete, split the attribute using subset (partition) of
	// nominal values.
	var splitV interface{}

	if MaxGain.IsContinu {
		splitV = MaxGain.GetMaxPartGainValue()
	} else {
		attrPartV := MaxGain.GetMaxPartGainValue()
		attrSubV := attrPartV.(tekstus.ListStrings)
		splitV = attrSubV[0].Normalize()
	}
//.........这里部分代码省略.........

开发者ID:shuLhan，项目名称:go-mining，代码行数:101，代码来源:cart.go

示例5: ClassifySetByWeight

//
// ClassifySetByWeight will classify each instance in samples by weight
// with respect to its single performance.
//
// Algorithm,
// (1) For each instance in samples,
// (1.1) for each stage,
// (1.1.1) collect votes for instance in current stage.
// (1.1.2) Compute probabilities of each classes in votes.
//
//		prob_class = count_of_class / total_votes
//
// (1.1.3) Compute total of probabilites times of stage weight.
//
//		stage_prob = prob_class * stage_weight
//
// (1.2) Divide each class stage probabilites with
//
//		stage_prob = stage_prob /
//			(sum_of_all_weights * number_of_tree_in_forest)
//
// (1.3) Select class label with highest probabilites.
// (1.4) Save stage probabilities for positive class.
// (2) Compute confusion matrix.
//
func (crf *Runtime) ClassifySetByWeight(samples tabula.ClasetInterface,
	sampleIds []int,
) (
	predicts []string, cm *classifier.CM, probs []float64,
) {
	stat := classifier.Stat{}
	stat.Start()

	vs := samples.GetClassValueSpace()
	stageProbs := make([]float64, len(vs))
	stageSumProbs := make([]float64, len(vs))
	sumWeights := numerus.Floats64Sum(crf.weights)

	// (1)
	rows := samples.GetDataAsRows()
	for _, row := range *rows {
		for y := range stageSumProbs {
			stageSumProbs[y] = 0
		}

		// (1.1)
		for y, forest := range crf.forests {
			// (1.1.1)
			votes := forest.Votes(row, -1)

			// (1.1.2)
			probs := tekstus.WordsProbabilitiesOf(votes, vs, false)

			// (1.1.3)
			for z := range probs {
				stageSumProbs[z] += probs[z]
				stageProbs[z] += probs[z] * crf.weights[y]
			}
		}

		// (1.2)
		stageWeight := sumWeights * float64(crf.NTree)

		for x := range stageProbs {
			stageProbs[x] = stageProbs[x] / stageWeight
		}

		// (1.3)
		_, maxi, ok := numerus.Floats64FindMax(stageProbs)
		if ok {
			predicts = append(predicts, vs[maxi])
		}

		probs = append(probs, stageSumProbs[0]/
			float64(len(crf.forests)))
	}

	// (2)
	actuals := samples.GetClassAsStrings()
	cm = crf.ComputeCM(sampleIds, vs, actuals, predicts)

	crf.ComputeStatFromCM(&stat, cm)
	stat.End()

	_ = stat.Write(crf.StatFile)

	return predicts, cm, probs
}

开发者ID:shuLhan，项目名称:go-mining，代码行数:88，代码来源:crf.go

注：本文中的github.com/shuLhan/tabula.ClasetInterface.GetClassAsStrings方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。