本文整理汇总了Python中sandbox.util.Util.Util.histogram方法的典型用法代码示例。如果您正苦于以下问题:Python Util.histogram方法的具体用法?Python Util.histogram怎么用?Python Util.histogram使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sandbox.util.Util.Util
的用法示例。
在下文中一共展示了Util.histogram方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: splitNode
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import histogram [as 别名]
def splitNode(self, tree, X, Y, d, k):
"""
Take a node in a tree and classify in order to split it into 2
"""
if self.featureSize == None:
featureSize = numpy.sqrt(X.shape[1])/float(X.shape[1])
else:
featureSize = self.featureSize
node = tree.getVertex((d, k))
inds = node.getTrainInds()
featureInds = node.getFeatureInds()
alpha = numpy.sum(Y[inds]==self.bestResponse)/float(inds.shape[0])
#Now classify
#We have the following condition if we need to do cross validation within the node
if Util.histogram(Y[inds])[0].min() > self.minLabelCount:
self.leafRanklearner.setWeight(1-alpha)
leafRank = self.leafRanklearner.generateLearner(X, Y)
else:
leafRank = MajorityPredictor()
node.setLeafRank(leafRank)
leafRank.learnModel(X[inds, :][:, featureInds], Y[inds])
predY = leafRank.predict(X[inds, :][:, featureInds])
if numpy.unique(predY).shape[0] == 2 and inds.shape[0] >= self.minSplit:
leftInds = inds[predY == self.bestResponse]
featureInds = numpy.sort(numpy.random.permutation(X.shape[1])[0:int(numpy.round(X.shape[1]*featureSize))])
leftNode = RankNode(leftInds, featureInds)
leftNode.setPure(numpy.unique(Y[leftInds]).shape[0] <= 1)
leftNode.setIsLeafNode(d==self.maxDepth-1 or leftNode.isPure())
leftNode.setScore((1 - float(2*k)/2**(d+1))*2**self.maxDepth)
tree.addEdge((d, k), (d+1, 2*k))
tree.setVertex((d+1, 2*k), leftNode)
rightInds = inds[predY != self.bestResponse]
featureInds = numpy.sort(numpy.random.permutation(X.shape[1])[0:int(numpy.round(X.shape[1]*featureSize))])
rightNode = RankNode(rightInds, featureInds)
rightNode.setPure(numpy.unique(Y[rightInds]).shape[0] <= 1)
rightNode.setIsLeafNode(d==self.maxDepth-1 or rightNode.isPure())
rightNode.setScore((1 - float(2*k+1)/2**(d+1))*2**self.maxDepth)
tree.addEdge((d, k), (d+1, 2*k+1))
tree.setVertex((d+1, 2*k+1), rightNode)
else:
node.setIsLeafNode(True)
node.setScore((1 - float(k)/2**d)*2**self.maxDepth)
return tree
示例2: testHistogram
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import histogram [as 别名]
def testHistogram(self):
v = numpy.array([0, 0, 1, 5, 0, 2, 2, 2, 5])
(freq, items) = Util.histogram(v)
self.assertTrue((freq == numpy.array([3, 1, 3, 2])).all())
self.assertTrue((items == numpy.array([0, 1, 2, 5])).all())
示例3: range
# 需要导入模块: from sandbox.util.Util import Util [as 别名]
# 或者: from sandbox.util.Util.Util import histogram [as 别名]
egoQuestionIds = eCsvReader.getEgoQuestionIds()
alterQuestionIds = eCsvReader.getAlterQuestionIds()
missing = 0
(egoX, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds, missing)
egoX[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(egoX[:, eCsvReader.ageIndex])
(alterX, titles) = eCsvReader.readFile(alterFileName, alterQuestionIds, missing)
alterX[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(alterX[:, eCsvReader.ageIndex])
numFeatures = egoX.shape[1]
numEgoExamples = egoX.shape[0]
numAlterExamples = alterX.shape[0]
for i in range(0, numFeatures):
(histE, uniqElementsE) = Util.histogram(egoX[:, i])
(histA, uniqElementsA) = Util.histogram(alterX[:, i])
print((str(i) + " " + str(egoQuestionIds[i])))
print(("Ego " + str(uniqElementsE)))
print(("Alter " + str(uniqElementsA)))
print((numpy.setxor1d(uniqElementsE, uniqElementsA)))
print((histE/numEgoExamples))
print((histA/numAlterExamples))
"""
Conclusion is that the distributions are broadly the same. The problem occurs
with missing data handling. For example in Ego there are values with [ 0. 8.]
with most zero, and in alter [ 0. 5.]. The means will be approx 8 for ego and 5 for
alter.
"""