本文整理汇总了Golang中github.com/sjwhitworth/golearn/base.FixedDataGrid.Size方法的典型用法代码示例。如果您正苦于以下问题:Golang FixedDataGrid.Size方法的具体用法?Golang FixedDataGrid.Size怎么用?Golang FixedDataGrid.Size使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类github.com/sjwhitworth/golearn/base.FixedDataGrid
的用法示例。
在下文中一共展示了FixedDataGrid.Size方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Golang代码示例。
示例1: findBestSplit
func findBestSplit(partition base.FixedDataGrid) {
var delta float64
delta = math.MinInt64
attrs := partition.AllAttributes()
classAttrs := partition.AllClassAttributes()
candidates := base.AttributeDifferenceReferences(attrs, classAttrs)
fmt.Println(delta)
fmt.Println(classAttrs)
fmt.Println(reflect.TypeOf(partition))
fmt.Println(reflect.TypeOf(candidates))
for i, n := range attrs {
fmt.Println(i)
//fmt.Println(partition)
fmt.Println(reflect.TypeOf(n))
attributeSpec, _ := partition.GetAttribute(n)
fmt.Println(partition.GetAttribute(n))
_, rows := partition.Size()
for j := 0; j < rows; j++ {
data := partition.Get(attributeSpec, j)
fmt.Println(base.UnpackBytesToFloat(data))
}
}
}
示例2: getNumericAttributeEntropy
func getNumericAttributeEntropy(f base.FixedDataGrid, attr *base.FloatAttribute) (float64, float64) {
// Resolve Attribute
attrSpec, err := f.GetAttribute(attr)
if err != nil {
panic(err)
}
// Build sortable vector
_, rows := f.Size()
refs := make([]numericSplitRef, rows)
f.MapOverRows([]base.AttributeSpec{attrSpec}, func(val [][]byte, row int) (bool, error) {
cls := base.GetClass(f, row)
v := base.UnpackBytesToFloat(val[0])
refs[row] = numericSplitRef{v, cls}
return true, nil
})
// Sort
sort.Sort(splitVec(refs))
generateCandidateSplitDistribution := func(val float64) map[string]map[string]int {
presplit := make(map[string]int)
postplit := make(map[string]int)
for _, i := range refs {
if i.val < val {
presplit[i.class]++
} else {
postplit[i.class]++
}
}
ret := make(map[string]map[string]int)
ret["0"] = presplit
ret["1"] = postplit
return ret
}
minSplitEntropy := math.Inf(1)
minSplitVal := math.Inf(1)
// Consider each possible function
for i := 0; i < len(refs)-1; i++ {
val := refs[i].val + refs[i+1].val
val /= 2
splitDist := generateCandidateSplitDistribution(val)
splitEntropy := getSplitEntropy(splitDist)
if splitEntropy < minSplitEntropy {
minSplitEntropy = splitEntropy
minSplitVal = val
}
}
return minSplitEntropy, minSplitVal
}
示例3: NewChiMergeFilter
// NewChiMergeFilter creates a ChiMergeFilter with some helpful intialisations.
func NewChiMergeFilter(d base.FixedDataGrid, significance float64) *ChiMergeFilter {
_, rows := d.Size()
return &ChiMergeFilter{
AbstractDiscretizeFilter{
make(map[base.Attribute]bool),
false,
d,
},
make(map[base.Attribute][]*FrequencyTableEntry),
significance,
2,
rows,
}
}
示例4: GenerateCrossFoldValidationConfusionMatrices
// GenerateCrossFoldValidationConfusionMatrices divides the data into a number of folds
// then trains and evaluates the classifier on each fold, producing a new ConfusionMatrix.
func GenerateCrossFoldValidationConfusionMatrices(data base.FixedDataGrid, cls base.Classifier, folds int) ([]ConfusionMatrix, error) {
_, rows := data.Size()
// Assign each row to a fold
foldMap := make([]int, rows)
inverseFoldMap := make(map[int][]int)
for i := 0; i < rows; i++ {
fold := rand.Intn(folds)
foldMap[i] = fold
if _, ok := inverseFoldMap[fold]; !ok {
inverseFoldMap[fold] = make([]int, 0)
}
inverseFoldMap[fold] = append(inverseFoldMap[fold], i)
}
ret := make([]ConfusionMatrix, folds)
// Create training/test views for each fold
for i := 0; i < folds; i++ {
// Fold i is for testing
testData := base.NewInstancesViewFromVisible(data, inverseFoldMap[i], data.AllAttributes())
otherRows := make([]int, 0)
for j := 0; j < folds; j++ {
if i == j {
continue
}
otherRows = append(otherRows, inverseFoldMap[j]...)
}
trainData := base.NewInstancesViewFromVisible(data, otherRows, data.AllAttributes())
// Train
err := cls.Fit(trainData)
if err != nil {
return nil, err
}
// Predict
pred, err := cls.Predict(testData)
if err != nil {
return nil, err
}
// Evaluate
cf, err := GetConfusionMatrix(testData, pred)
if err != nil {
return nil, err
}
ret[i] = cf
}
return ret, nil
}
示例5: processData
func processData(x base.FixedDataGrid) instances {
_, rows := x.Size()
result := make(instances, rows)
// Retrieve numeric non-class Attributes
numericAttrs := base.NonClassFloatAttributes(x)
numericAttrSpecs := base.ResolveAttributes(x, numericAttrs)
// Retrieve class Attributes
classAttrs := x.AllClassAttributes()
if len(classAttrs) != 1 {
panic("Only one classAttribute supported!")
}
// Check that the class Attribute is categorical
// (with two values) or binary
classAttr := classAttrs[0]
if attr, ok := classAttr.(*base.CategoricalAttribute); ok {
if len(attr.GetValues()) != 2 {
panic("To many values for Attribute!")
}
} else if _, ok := classAttr.(*base.BinaryAttribute); ok {
} else {
panic("Wrong class Attribute type!")
}
// Convert each row
x.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
// Allocate a new row
probRow := make([]float64, len(numericAttrSpecs))
// Read out the row
for i, _ := range numericAttrSpecs {
probRow[i] = base.UnpackBytesToFloat(row[i])
}
// Get the class for the values
class := base.GetClass(x, rowNo)
instance := instance{class, probRow}
result[rowNo] = instance
return true, nil
})
return result
}
示例6: computePairwiseDistances
func computePairwiseDistances(inst base.FixedDataGrid, attrs []base.Attribute, metric pairwise.PairwiseDistanceFunc) (*mat64.Dense, error) {
// Compute pair-wise distances
// First convert everything to floats
mats, err := base.ConvertAllRowsToMat64(attrs, inst)
if err != nil {
return nil, err
}
// Next, do an n^2 computation of all pairwise distances
_, rows := inst.Size()
dist := mat64.NewDense(rows, rows, nil)
for i := 0; i < rows; i++ {
for j := i + 1; j < rows; j++ {
d := metric.Distance(mats[i], mats[j])
dist.Set(i, j, d)
dist.Set(j, i, d)
}
}
return dist, nil
}
示例7: convertInstancesToLabelVec
func convertInstancesToLabelVec(X base.FixedDataGrid) []float64 {
// Get the class Attributes
classAttrs := X.AllClassAttributes()
// Only support 1 class Attribute
if len(classAttrs) != 1 {
panic(fmt.Sprintf("%d ClassAttributes (1 expected)", len(classAttrs)))
}
// ClassAttribute must be numeric
if _, ok := classAttrs[0].(*base.FloatAttribute); !ok {
panic(fmt.Sprintf("%s: ClassAttribute must be a FloatAttribute", classAttrs[0]))
}
// Allocate return structure
_, rows := X.Size()
labelVec := make([]float64, rows)
// Resolve class Attribute specification
classAttrSpecs := base.ResolveAttributes(X, classAttrs)
X.MapOverRows(classAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
labelVec[rowNo] = base.UnpackBytesToFloat(row[0])
return true, nil
})
return labelVec
}
示例8: GetConfusionMatrix
// GetConfusionMatrix builds a ConfusionMatrix from a set of reference (`ref')
// and generate (`gen') Instances.
func GetConfusionMatrix(ref base.FixedDataGrid, gen base.FixedDataGrid) (map[string]map[string]int, error) {
_, refRows := ref.Size()
_, genRows := gen.Size()
if refRows != genRows {
return nil, errors.New(fmt.Sprintf("Row count mismatch: ref has %d rows, gen has %d rows", refRows, genRows))
}
ret := make(map[string]map[string]int)
for i := 0; i < int(refRows); i++ {
referenceClass := base.GetClass(ref, i)
predictedClass := base.GetClass(gen, i)
if _, ok := ret[referenceClass]; ok {
ret[referenceClass][predictedClass] += 1
} else {
ret[referenceClass] = make(map[string]int)
ret[referenceClass][predictedClass] = 1
}
}
return ret, nil
}
示例9: convertInstancesToProblemVec
func convertInstancesToProblemVec(X base.FixedDataGrid) [][]float64 {
// Allocate problem array
_, rows := X.Size()
problemVec := make([][]float64, rows)
// Retrieve numeric non-class Attributes
numericAttrs := base.NonClassFloatAttributes(X)
numericAttrSpecs := base.ResolveAttributes(X, numericAttrs)
// Convert each row
X.MapOverRows(numericAttrSpecs, func(row [][]byte, rowNo int) (bool, error) {
// Allocate a new row
probRow := make([]float64, len(numericAttrSpecs))
// Read out the row
for i, _ := range numericAttrSpecs {
probRow[i] = base.UnpackBytesToFloat(row[i])
}
// Add the row
problemVec[rowNo] = probRow
return true, nil
})
return problemVec
}
示例10: GetConfusionMatrix
// GetConfusionMatrix builds a ConfusionMatrix from a set of reference (`ref')
// and generate (`gen') Instances.
func GetConfusionMatrix(ref base.FixedDataGrid, gen base.FixedDataGrid) map[string]map[string]int {
_, refRows := ref.Size()
_, genRows := gen.Size()
if refRows != genRows {
panic("Row counts should match")
}
ret := make(map[string]map[string]int)
for i := 0; i < int(refRows); i++ {
referenceClass := base.GetClass(ref, i)
predictedClass := base.GetClass(gen, i)
if _, ok := ret[referenceClass]; ok {
ret[referenceClass][predictedClass] += 1
} else {
ret[referenceClass] = make(map[string]int)
ret[referenceClass][predictedClass] = 1
}
}
return ret
}
示例11: InferID3Tree
// InferID3Tree builds a decision tree using a RuleGenerator
// from a set of Instances (implements the ID3 algorithm)
func InferID3Tree(from base.FixedDataGrid, with RuleGenerator) *DecisionTreeNode {
// Count the number of classes at this node
classes := base.GetClassDistribution(from)
// If there's only one class, return a DecisionTreeLeaf with
// the only class available
if len(classes) == 1 {
maxClass := ""
for i := range classes {
maxClass = i
}
ret := &DecisionTreeNode{
LeafNode,
nil,
classes,
maxClass,
getClassAttr(from),
&DecisionTreeRule{nil, 0.0},
}
return ret
}
// Only have the class attribute
maxVal := 0
maxClass := ""
for i := range classes {
if classes[i] > maxVal {
maxClass = i
maxVal = classes[i]
}
}
// If there are no more Attributes left to split on,
// return a DecisionTreeLeaf with the majority class
cols, _ := from.Size()
if cols == 2 {
ret := &DecisionTreeNode{
LeafNode,
nil,
classes,
maxClass,
getClassAttr(from),
&DecisionTreeRule{nil, 0.0},
}
return ret
}
// Generate a return structure
ret := &DecisionTreeNode{
RuleNode,
nil,
classes,
maxClass,
getClassAttr(from),
nil,
}
// Generate the splitting rule
splitRule := with.GenerateSplitRule(from)
if splitRule == nil {
// Can't determine, just return what we have
return ret
}
// Split the attributes based on this attribute's value
var splitInstances map[string]base.FixedDataGrid
if _, ok := splitRule.SplitAttr.(*base.FloatAttribute); ok {
splitInstances = base.DecomposeOnNumericAttributeThreshold(from,
splitRule.SplitAttr, splitRule.SplitVal)
} else {
splitInstances = base.DecomposeOnAttributeValues(from, splitRule.SplitAttr)
}
// Create new children from these attributes
ret.Children = make(map[string]*DecisionTreeNode)
for k := range splitInstances {
newInstances := splitInstances[k]
ret.Children[k] = InferID3Tree(newInstances, with)
}
ret.SplitRule = splitRule
return ret
}
示例12: Fit
// Fill data matrix with Bernoulli Naive Bayes model. All values
// necessary for calculating prior probability and p(f_i)
func (nb *BernoulliNBClassifier) Fit(X base.FixedDataGrid) {
// Check that all Attributes are binary
classAttrs := X.AllClassAttributes()
allAttrs := X.AllAttributes()
featAttrs := base.AttributeDifference(allAttrs, classAttrs)
for i := range featAttrs {
if _, ok := featAttrs[i].(*base.BinaryAttribute); !ok {
panic(fmt.Sprintf("%v: Should be BinaryAttribute", featAttrs[i]))
}
}
featAttrSpecs := base.ResolveAttributes(X, featAttrs)
// Check that only one classAttribute is defined
if len(classAttrs) != 1 {
panic("Only one class Attribute can be used")
}
// Number of features and instances in this training set
_, nb.trainingInstances = X.Size()
nb.attrs = featAttrs
nb.features = len(featAttrs)
// Number of instances in class
nb.classInstances = make(map[string]int)
// Number of documents with given term (by class)
docsContainingTerm := make(map[string][]int)
// This algorithm could be vectorized after binarizing the data
// matrix. Since mat64 doesn't have this function, a iterative
// version is used.
X.MapOverRows(featAttrSpecs, func(docVector [][]byte, r int) (bool, error) {
class := base.GetClass(X, r)
// increment number of instances in class
t, ok := nb.classInstances[class]
if !ok {
t = 0
}
nb.classInstances[class] = t + 1
for feat := 0; feat < len(docVector); feat++ {
v := docVector[feat]
// In Bernoulli Naive Bayes the presence and absence of
// features are considered. All non-zero values are
// treated as presence.
if v[0] > 0 {
// Update number of times this feature appeared within
// given label.
t, ok := docsContainingTerm[class]
if !ok {
t = make([]int, nb.features)
docsContainingTerm[class] = t
}
t[feat] += 1
}
}
return true, nil
})
// Pre-calculate conditional probabilities for each class
for c, _ := range nb.classInstances {
nb.condProb[c] = make([]float64, nb.features)
for feat := 0; feat < nb.features; feat++ {
classTerms, _ := docsContainingTerm[c]
numDocs := classTerms[feat]
docsInClass, _ := nb.classInstances[c]
classCondProb, _ := nb.condProb[c]
// Calculate conditional probability with laplace smoothing
classCondProb[feat] = float64(numDocs+1) / float64(docsInClass+1)
}
}
}
示例13: generateTrainingInstances
// generateTrainingInstances generates RandomFeatures number of
// attributes and returns a modified version of base.Instances
// for training the model
func (b *BaggedModel) generateTrainingInstances(model int, from base.FixedDataGrid) base.FixedDataGrid {
_, rows := from.Size()
insts := base.SampleWithReplacement(from, rows)
selected := b.generateTrainingAttrs(model, from)
return base.NewInstancesViewFromAttrs(insts, selected)
}
示例14: Predict
// Predict returns a classification for the vector, based on a vector input, using the KNN algorithm.
func (KNN *KNNClassifier) Predict(what base.FixedDataGrid) base.FixedDataGrid {
// Check what distance function we are using
var distanceFunc pairwise.PairwiseDistanceFunc
switch KNN.DistanceFunc {
case "euclidean":
distanceFunc = pairwise.NewEuclidean()
case "manhattan":
distanceFunc = pairwise.NewManhattan()
default:
panic("unsupported distance function")
}
// Check Compatibility
allAttrs := base.CheckCompatible(what, KNN.TrainingData)
if allAttrs == nil {
// Don't have the same Attributes
return nil
}
// Use optimised version if permitted
if KNN.AllowOptimisations {
if KNN.DistanceFunc == "euclidean" {
if KNN.canUseOptimisations(what) {
return KNN.optimisedEuclideanPredict(what.(*base.DenseInstances))
}
}
}
fmt.Println("Optimisations are switched off")
// Remove the Attributes which aren't numeric
allNumericAttrs := make([]base.Attribute, 0)
for _, a := range allAttrs {
if fAttr, ok := a.(*base.FloatAttribute); ok {
allNumericAttrs = append(allNumericAttrs, fAttr)
}
}
// Generate return vector
ret := base.GeneratePredictionVector(what)
// Resolve Attribute specifications for both
whatAttrSpecs := base.ResolveAttributes(what, allNumericAttrs)
trainAttrSpecs := base.ResolveAttributes(KNN.TrainingData, allNumericAttrs)
// Reserve storage for most the most similar items
distances := make(map[int]float64)
// Reserve storage for voting map
maxmap := make(map[string]int)
// Reserve storage for row computations
trainRowBuf := make([]float64, len(allNumericAttrs))
predRowBuf := make([]float64, len(allNumericAttrs))
_, maxRow := what.Size()
curRow := 0
// Iterate over all outer rows
what.MapOverRows(whatAttrSpecs, func(predRow [][]byte, predRowNo int) (bool, error) {
if (curRow%1) == 0 && curRow > 0 {
fmt.Printf("KNN: %.2f %% done\n", float64(curRow)*100.0/float64(maxRow))
}
curRow++
// Read the float values out
for i, _ := range allNumericAttrs {
predRowBuf[i] = base.UnpackBytesToFloat(predRow[i])
}
predMat := utilities.FloatsToMatrix(predRowBuf)
// Find the closest match in the training data
KNN.TrainingData.MapOverRows(trainAttrSpecs, func(trainRow [][]byte, srcRowNo int) (bool, error) {
// Read the float values out
for i, _ := range allNumericAttrs {
trainRowBuf[i] = base.UnpackBytesToFloat(trainRow[i])
}
// Compute the distance
trainMat := utilities.FloatsToMatrix(trainRowBuf)
distances[srcRowNo] = distanceFunc.Distance(predMat, trainMat)
return true, nil
})
sorted := utilities.SortIntMap(distances)
values := sorted[:KNN.NearestNeighbours]
maxClass := KNN.vote(maxmap, values)
base.SetClass(ret, predRowNo, maxClass)
return true, nil
})
return ret
}
示例15: DBSCAN
// DBSCAN clusters inst using the parameters allowed in and produces a ClusterId->[RowId] map
func DBSCAN(inst base.FixedDataGrid, params DBSCANParameters) (ClusterMap, error) {
// Compute the distances between each possible point
dist, err := computePairwiseDistances(inst, params.Attributes, params.Metric)
if err != nil {
return nil, err
}
_, rows := inst.Size()
clusterMap := make(map[int][]int)
visited := big.NewInt(0)
clustered := big.NewInt(0)
// expandCluster adds P to a cluster C, visiting any neighbours
expandCluster := func(p int, neighbours *big.Int, c int) {
if clustered.Bit(p) == 1 {
panic("Shouldn't happen!")
}
// Add this point to cluster C
if _, ok := clusterMap[c]; !ok {
clusterMap[c] = make([]int, 0)
}
clusterMap[c] = append(clusterMap[c], p)
clustered.SetBit(clustered, p, 1)
visited.SetBit(visited, p, 1)
for i := 0; i < rows; i++ {
reset := false
if neighbours.Bit(i) == 0 {
// Not a neighbour, so skip
continue
}
if visited.Bit(i) == 0 {
// not yet visited
visited = visited.SetBit(visited, i, 1) // Mark as visited
newNeighbours := big.NewInt(0)
newNeighbours = regionQuery(i, newNeighbours, dist, params.Eps)
if BitCount(newNeighbours) >= params.MinCount {
neighbours = neighbours.Or(neighbours, newNeighbours)
reset = true
}
} else {
continue
}
if clustered.Bit(i) == 0 {
clusterMap[c] = append(clusterMap[c], i)
clustered = clustered.SetBit(clustered, i, 1)
}
if reset {
i = 0
}
}
}
c := 0
for i := 0; i < rows; i++ {
if visited.Bit(i) == 1 {
continue // Already visited here
}
visited.SetBit(visited, i, 1)
neighbours := big.NewInt(0)
neighbours = regionQuery(i, neighbours, dist, params.Eps)
if BitCount(neighbours) < params.MinCount {
// Noise, cluster 0
clustered = clustered.Or(clustered, neighbours)
continue
}
c = c + 1 // Increment cluster count
expandCluster(i, neighbours, c)
}
// Remove anything from the map which doesn't make
// minimum points
rmKeys := make([]int, 0)
for id := range clusterMap {
if len(clusterMap[id]) < params.MinCount {
rmKeys = append(rmKeys, id)
}
}
for _, r := range rmKeys {
delete(clusterMap, r)
}
return ClusterMap(clusterMap), nil
}