本文整理汇总了Python中sklearn.utils.fixes.bincount函数的典型用法代码示例。如果您正苦于以下问题:Python bincount函数的具体用法?Python bincount怎么用?Python bincount使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了bincount函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_min_samples_leaf
def check_min_samples_leaf(name):
X, y = hastie_X, hastie_y
# Test if leaves contain more than leaf_count training examples
ForestEstimator = FOREST_ESTIMATORS[name]
# test boundary value
assert_raises(ValueError, ForestEstimator(min_samples_leaf=-1).fit, X, y)
assert_raises(ValueError, ForestEstimator(min_samples_leaf=0).fit, X, y)
est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
est.fit(X, y)
out = est.estimators_[0].tree_.apply(X)
node_counts = bincount(out)
# drop inner nodes
leaf_count = node_counts[node_counts != 0]
assert_greater(np.min(leaf_count), 4, "Failed with {0}".format(name))
est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1, random_state=0)
est.fit(X, y)
out = est.estimators_[0].tree_.apply(X)
node_counts = np.bincount(out)
# drop inner nodes
leaf_count = node_counts[node_counts != 0]
assert_greater(np.min(leaf_count), len(X) * 0.25 - 1, "Failed with {0}".format(name))
示例2: check_min_weight_fraction_leaf
def check_min_weight_fraction_leaf(name, X, y):
# Test if leaves contain at least min_weight_fraction_leaf of the
# training set
ForestEstimator = FOREST_ESTIMATORS[name]
rng = np.random.RandomState(0)
weights = rng.rand(X.shape[0])
total_weight = np.sum(weights)
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
# by setting max_leaf_nodes
for max_leaf_nodes in (None, 1000):
for frac in np.linspace(0, 0.5, 6):
est = ForestEstimator(min_weight_fraction_leaf=frac,
max_leaf_nodes=max_leaf_nodes,
random_state=0)
if isinstance(est, (RandomForestClassifier,
RandomForestRegressor)):
est.bootstrap = False
est.fit(X, y, sample_weight=weights)
out = est.estimators_[0].tree_.apply(X)
node_weights = bincount(out, weights=weights)
# drop inner nodes
leaf_weights = node_weights[node_weights != 0]
assert_greater_equal(
np.min(leaf_weights),
total_weight * est.min_weight_fraction_leaf,
"Failed with {0} "
"min_weight_fraction_leaf={1}".format(
name, est.min_weight_fraction_leaf))
示例3: _make_test_folds
def _make_test_folds(self, X, y=None, groups=None):
if self.shuffle:
rng = check_random_state(self.random_state)
else:
rng = self.random_state
y = np.asarray(y)
n_samlples = len(X)
y = ','.join(y).split(',')
unique_y, y_inversed = np.unique(y, return_inverse=True)
y_counts = bincount(y_inversed)
min_groups = np.min(y_counts)
if np.all(self.n_splits > y_counts):
raise ValueError("All the n_groups for individual classes"
" are less than n_splits=%d."
% (self.n_splits))
if self.n_splits > min_groups:
warnings.warn(("The least populated class in y has only %d"
" members, which is too few. The minimum"
" number of groups for any class cannot"
" be less than n_splits=%d."
% (min_groups, self.n_splits)), Warning)
# pre-assign each sample to a test fold index using individual KFold
# splitting strategies for each class so as to respect the balance of
# classes
# NOTE: Passing the data corresponding to ith class say X[y==class_i]
# will break when the data is not 100% stratifiable for all classes.
# So we pass np.zeroes(max(c, n_splits)) as data to the KFold
test_folds = iterative_stratification(X, set(y), self.n_splits, rng)
return test_folds
示例4: _recompute_centers
def _recompute_centers( X, labels, n_clusters):
"""
Computation of cluster centers / means.
Parameters
----------
X: array-like, shape (n_samples, n_features)
labels: array of integers, shape (n_samples)
Current label assignment
n_clusters: int
Number of desired clusters
Returns
-------
centers: array, shape (n_clusters, n_features)
The resulting centers
"""
n_samples = X.shape[0]
n_features = X.shape[1]
# Initialize centers to all zero
centers = np.zeros((n_clusters, n_features))
n_samples_in_cluster = bincount(labels, minlength=n_clusters)
# Compute a center for each label
# For each label, average over samples and features
# TODO: IMPLEMENT
# Take all of the samples in a cluster and average their features
return centers
示例5: _generate_unsampled_indices
def _generate_unsampled_indices(random_state, n_samples):
'''Samples out of bag'''
sample_indices = _generate_sample_indices(random_state, n_samples)
sample_counts = bincount(sample_indices, minlength=n_samples)
unsampled_mask = sample_counts == 0
indices_range = np.arange(n_samples)
unsampled_indices = indices_range[unsampled_mask]
return unsampled_indices
示例6: entropy
def entropy(samples):
n_samples = len(samples)
entropy = 0.
for count in bincount(samples):
p = 1. * count / n_samples
if p > 0:
entropy -= p * np.log2(p)
return entropy
示例7: test_sample_weight
def test_sample_weight():
"""Check sample weighting."""
# Test that zero-weighted samples are not taken into account
X = np.arange(100)[:, np.newaxis]
y = np.ones(100)
y[:50] = 0.0
sample_weight = np.ones(100)
sample_weight[y == 0] = 0.0
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X, y, sample_weight=sample_weight)
assert_array_equal(clf.predict(X), np.ones(100))
# Test that low weighted samples are not taken into account at low depth
X = np.arange(200)[:, np.newaxis]
y = np.zeros(200)
y[50:100] = 1
y[100:200] = 2
X[100:200, 0] = 200
sample_weight = np.ones(200)
sample_weight[y == 2] = .51 # Samples of class '2' are still weightier
clf = DecisionTreeClassifier(max_depth=1, random_state=0)
clf.fit(X, y, sample_weight=sample_weight)
assert_equal(clf.tree_.threshold[0], 149.5)
sample_weight[y == 2] = .50 # Samples of class '2' are no longer weightier
clf = DecisionTreeClassifier(max_depth=1, random_state=0)
clf.fit(X, y, sample_weight=sample_weight)
assert_equal(clf.tree_.threshold[0], 49.5) # Threshold should have moved
# Test that sample weighting is the same as having duplicates
X = iris.data
y = iris.target
duplicates = rng.randint(0, X.shape[0], 200)
clf = DecisionTreeClassifier(random_state=1)
clf.fit(X[duplicates], y[duplicates])
sample_weight = bincount(duplicates, minlength=X.shape[0])
clf2 = DecisionTreeClassifier(random_state=1)
clf2.fit(X, y, sample_weight=sample_weight)
internal = clf.tree_.children_left != tree._tree.TREE_LEAF
assert_array_almost_equal(clf.tree_.threshold[internal],
clf2.tree_.threshold[internal])
示例8: _recompute_centers
def _recompute_centers( X, labels, n_clusters):
"""
Computation of cluster centers / means.
Parameters
----------
X: array-like, shape (n_samples, n_features)
labels: array of integers, shape (n_samples)
Current label assignment
n_clusters: int
Number of desired clusters
Returns
-------
centers: array, shape (n_clusters, n_features)
The resulting centers
"""
n_samples = X.shape[0]
n_features = X.shape[1]
# Initialize centers to all zero
centers = np.zeros((n_clusters, n_features))
n_samples_in_cluster = bincount(labels, minlength=n_clusters)
# Compute a center for each label
# For each label, average over samples and features
#TODO: IMPLEMENT
# Take all of the samples in a cluster and add their features
# For each sample
# What label is it? Let's say its label x
# Add feature i to label X's feature value i
for sample_idx in xrange(n_samples):
label = labels[sample_idx]
centers[label] += X[sample_idx]
#for j in xrange(n_features):
# centers[label[j]] +=X[sample_idx[j]]
# Normalize by the size of the cluster
centers /= n_samples_in_cluster[:, np.newaxis]
return centers
示例9: check_min_samples_leaf
def check_min_samples_leaf(name, X, y):
# Test if leaves contain more than leaf_count training examples
ForestEstimator = FOREST_ESTIMATORS[name]
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
# by setting max_leaf_nodes
for max_leaf_nodes in (None, 1000):
est = ForestEstimator(min_samples_leaf=5,
max_leaf_nodes=max_leaf_nodes,
random_state=0)
est.fit(X, y)
out = est.estimators_[0].tree_.apply(X)
node_counts = bincount(out)
# drop inner nodes
leaf_count = node_counts[node_counts != 0]
assert_greater(np.min(leaf_count), 4,
"Failed with {0}".format(name))
示例10: _balanced_parallel_build_trees
def _balanced_parallel_build_trees(n_trees, forest, X, y, sample_weight, sample_mask, X_argsorted, seed, verbose):
"""Private function used to build a batch of trees within a job"""
from sklearn.utils import check_random_state
from sklearn.utils.fixes import bincount
import random
MAX_INT = numpy.iinfo(numpy.int32).max
random_state = check_random_state(seed)
trees = []
for i in xrange(n_trees):
if verbose > 1:
print("building tree %d of %d" % (i+1, n_trees))
seed = random_state.randint(MAX_INT)
tree = forest._make_estimator(append = False)
tree.set_params(compute_importances=forest.compute_importances)
tree.set_params(random_state = check_random_state(seed))
if forest.bootstrap:
n_samples = X.shape[0]
if sample_weight is None:
curr_sample_weight = numpy.ones((n_samples,), dtype=numpy.float64)
else:
curr_sample_weight = sample_weight.copy()
ty = list(enumerate(y))
indices = DataUtils.FilterData(ty, val=1, frac=0.5, col=1, indicesToUse=0, indicesOnly=1)[0]
indices2 = random_state.randint(0, len(indices), len(indices))
indices = [indices[j] for j in indices2]
sample_counts = bincount(indices, minlength=n_samples)
curr_sample_weight *= sample_counts
curr_sample_mask = sample_mask.copy()
curr_sample_mask[sample_counts==0] = False
tree.fit(X, y, sample_weight=curr_sample_weight, sample_mask=curr_sample_mask, X_argsorted=X_argsorted, check_input=False)
tree.indices = curr_sample_mask
else:
tree.fit(X, y, sample_weight=sample_weight, sample_mask=sample_mask, X_argsorted=X_argsorted, check_input=False)
trees.append(tree)
return trees
示例11: _recompute_centers
def _recompute_centers( X, labels, n_clusters):
"""
Computation of cluster centers / means.
Parameters
----------
X: array-like, shape (n_samples, n_features)
labels: array of integers, shape (n_samples)
Current label assignment
n_clusters: int
Number of desired clusters
Returns
-------
centers: array, shape (n_clusters, n_features)
The resulting centers
"""
n_samples = X.shape[0]
n_features = X.shape[1]
# Initialize centers to all zero
centers = np.zeros((n_clusters, n_features))
n_samples_in_cluster = bincount(labels, minlength=n_clusters)
# Compute a center for each label
# For each label, average over samples and features
#TODO: IMPLEMENT
# 1. For each sample
# 2. What label is it? Let's say its label is 'label'
# 3. Add feature X's feature i to centers[label] feature value i
# Normalize by the size of the cluster
centers /= n_samples_in_cluster[:, np.newaxis]
return centers
示例12: _iter_indices
def _iter_indices(self):
rng = np.random.RandomState(self.random_state)
cls_count = bincount(self.y_indices)
for n in range(self.n_iter):
train = []
test = []
for i, cls in enumerate(self.classes):
sample_size = int(cls_count[i]*(1-self.test_size))
randint = rng.randint(cls_count[i], size=sample_size)
aidx = np.where((self.y == cls))[0]
iidx = aidx[randint]
oidx = aidx[list(set(range(cls_count[i])).difference(set(randint)))]
train.extend(iidx)
test.extend(oidx)
train = rng.permutation(train)
test = rng.permutation(test)
yield train, test
示例13: _recompute_centers
def _recompute_centers( X, labels, n_clusters):
"""
Computation of cluster centers / means.
Parameters
----------
X: array-like, shape (n_samples, n_features)
labels: array of integers, shape (n_samples)
Current label assignment
n_clusters: int
Number of desired clusters
Returns
-------
centers: array, shape (n_clusters, n_features)
The resulting centers
"""
n_samples = X.shape[0]
n_features = X.shape[1]
# Initialize centers to all zero
centers = np.zeros((n_clusters, n_features))
n_samples_in_cluster = bincount(labels, minlength=n_clusters)
# Compute a center for each label
# For each label, average over samples and features
#TODO: IMPLEMENT
for i in range(n_samples):
for j in range(n_features):
centers[labels[i], j] += X[i, j]
# Normalize by the size of the cluster
centers /= n_samples_in_cluster[:, np.newaxis]
return centers
示例14: test_sample_weight
def test_sample_weight():
"""Check sample weighting."""
# Test that zero-weighted samples are not taken into account
X = np.arange(100)[:, np.newaxis]
y = np.ones(100)
y[:50] = 0.0
sample_weight = np.ones(100)
sample_weight[y == 0] = 0.0
clf = tree.DecisionTreeClassifier()
clf.fit(X, y, sample_weight=sample_weight)
assert_array_equal(clf.predict(X), np.ones(100))
# Test that low weighted samples are not taken into account at low depth
X = np.arange(200)[:, np.newaxis]
y = np.zeros(200)
y[50:100] = 1
y[100:200] = 2
X[100:200, 0] = 200
sample_weight = np.ones(200)
sample_weight[y == 2] = .51 # Samples of class '2' are still weightier
clf = tree.DecisionTreeClassifier(max_depth=1)
clf.fit(X, y, sample_weight=sample_weight)
assert_equal(clf.tree_.threshold[0], 149.5)
sample_weight[y == 2] = .50 # Samples of class '2' are no longer weightier
clf = tree.DecisionTreeClassifier(max_depth=1)
clf.fit(X, y, sample_weight=sample_weight)
assert_equal(clf.tree_.threshold[0], 49.5) # Threshold should have moved
# Test that sample weighting is the same as having duplicates
X = iris.data
y = iris.target
duplicates = rng.randint(0, X.shape[0], 1000)
clf = tree.DecisionTreeClassifier(random_state=1)
clf.fit(X[duplicates], y[duplicates])
from sklearn.utils.fixes import bincount
sample_weight = bincount(duplicates, minlength=X.shape[0])
clf2 = tree.DecisionTreeClassifier(random_state=1)
clf2.fit(X, y, sample_weight=sample_weight)
internal = clf.tree_.children_left != tree._tree.TREE_LEAF
assert_array_equal(clf.tree_.threshold[internal],
clf2.tree_.threshold[internal])
# Test negative weights
X = iris.data
y = iris.target
sample_weight = -np.ones(X.shape[0])
clf = tree.DecisionTreeClassifier(random_state=1)
assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)
sample_weight = np.ones(X.shape[0])
sample_weight[0] = -1
clf = tree.DecisionTreeClassifier(random_state=1)
clf.fit(X, y, sample_weight=sample_weight)
# Check that predict_proba returns valid probabilities in the presence of
# samples with negative weight
X = iris.data
y = iris.target
sample_weight = rng.normal(.5, 1.0, X.shape[0])
clf = tree.DecisionTreeClassifier(random_state=1)
clf.fit(X, y, sample_weight=sample_weight)
proba = clf.predict_proba(X)
assert (proba >= 0).all() and (proba <= 1).all()
示例15: sensitivity_specificity_support
#.........这里部分代码省略.........
raise ValueError("Target is %s but average='binary'. Please "
"choose another average setting." % y_type)
elif pos_label not in (None, 1):
warnings.warn("Note that pos_label (set to %r) is ignored when "
"average != 'binary' (got %r). You may use "
"labels=[pos_label] to specify a single positive class."
% (pos_label, average), UserWarning)
if labels is None:
labels = present_labels
n_labels = None
else:
n_labels = len(labels)
labels = np.hstack(
[labels, np.setdiff1d(
present_labels, labels, assume_unique=True)])
# Calculate tp_sum, pred_sum, true_sum ###
if y_type.startswith('multilabel'):
raise ValueError('imblearn does not support multilabel')
elif average == 'samples':
raise ValueError("Sample-based precision, recall, fscore is "
"not meaningful outside multilabel "
"classification. See the accuracy_score instead.")
else:
le = LabelEncoder()
le.fit(labels)
y_true = le.transform(y_true)
y_pred = le.transform(y_pred)
sorted_labels = le.classes_
# labels are now from 0 to len(labels) - 1 -> use bincount
tp = y_true == y_pred
tp_bins = y_true[tp]
if sample_weight is not None:
tp_bins_weights = np.asarray(sample_weight)[tp]
else:
tp_bins_weights = None
if len(tp_bins):
tp_sum = bincount(
tp_bins, weights=tp_bins_weights, minlength=len(labels))
else:
# Pathological case
true_sum = pred_sum = tp_sum = np.zeros(len(labels))
if len(y_pred):
pred_sum = bincount(
y_pred, weights=sample_weight, minlength=len(labels))
if len(y_true):
true_sum = bincount(
y_true, weights=sample_weight, minlength=len(labels))
# Compute the true negative
tn_sum = y_true.size - (pred_sum + true_sum - tp_sum)
# Retain only selected labels
indices = np.searchsorted(sorted_labels, labels[:n_labels])
tp_sum = tp_sum[indices]
true_sum = true_sum[indices]
pred_sum = pred_sum[indices]
tn_sum = tn_sum[indices]
if average == 'micro':
tp_sum = np.array([tp_sum.sum()])
pred_sum = np.array([pred_sum.sum()])
true_sum = np.array([true_sum.sum()])
tn_sum = np.array([tn_sum.sum()])
# Finally, we have all our sufficient statistics. Divide! #
with np.errstate(divide='ignore', invalid='ignore'):
# Divide, and on zero-division, set scores to 0 and warn:
# Oddly, we may get an "invalid" rather than a "divide" error
# here.
specificity = _prf_divide(tn_sum, tn_sum + pred_sum - tp_sum,
'specificity', 'predicted', average,
warn_for)
sensitivity = _prf_divide(tp_sum, true_sum, 'sensitivity', 'true',
average, warn_for)
# Average the results
if average == 'weighted':
weights = true_sum
if weights.sum() == 0:
return 0, 0, None
elif average == 'samples':
weights = sample_weight
else:
weights = None
if average is not None:
assert average != 'binary' or len(specificity) == 1
specificity = np.average(specificity, weights=weights)
sensitivity = np.average(sensitivity, weights=weights)
true_sum = None # return no support
return sensitivity, specificity, true_sum