当前位置: 首页>>代码示例>>Python>>正文


Python fixes.bincount函数代码示例

本文整理汇总了Python中sklearn.utils.fixes.bincount函数的典型用法代码示例。如果您正苦于以下问题:Python bincount函数的具体用法?Python bincount怎么用?Python bincount使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了bincount函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: check_min_samples_leaf

def check_min_samples_leaf(name):
    X, y = hastie_X, hastie_y

    # Test if leaves contain more than leaf_count training examples
    ForestEstimator = FOREST_ESTIMATORS[name]

    # test boundary value
    assert_raises(ValueError, ForestEstimator(min_samples_leaf=-1).fit, X, y)
    assert_raises(ValueError, ForestEstimator(min_samples_leaf=0).fit, X, y)

    est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), 4, "Failed with {0}".format(name))

    est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1, random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = np.bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), len(X) * 0.25 - 1, "Failed with {0}".format(name))
开发者ID:nelson-liu,项目名称:scikit-learn,代码行数:25,代码来源:test_forest.py

示例2: check_min_weight_fraction_leaf

def check_min_weight_fraction_leaf(name, X, y):
    # Test if leaves contain at least min_weight_fraction_leaf of the
    # training set
    ForestEstimator = FOREST_ESTIMATORS[name]
    rng = np.random.RandomState(0)
    weights = rng.rand(X.shape[0])
    total_weight = np.sum(weights)

    # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
    # by setting max_leaf_nodes
    for max_leaf_nodes in (None, 1000):
        for frac in np.linspace(0, 0.5, 6):
            est = ForestEstimator(min_weight_fraction_leaf=frac,
                                  max_leaf_nodes=max_leaf_nodes,
                                  random_state=0)
            if isinstance(est, (RandomForestClassifier,
                                RandomForestRegressor)):
                est.bootstrap = False
            est.fit(X, y, sample_weight=weights)
            out = est.estimators_[0].tree_.apply(X)
            node_weights = bincount(out, weights=weights)
            # drop inner nodes
            leaf_weights = node_weights[node_weights != 0]
            assert_greater_equal(
                np.min(leaf_weights),
                total_weight * est.min_weight_fraction_leaf,
                "Failed with {0} "
                "min_weight_fraction_leaf={1}".format(
                    name, est.min_weight_fraction_leaf))
开发者ID:EddieBurning,项目名称:scikit-learn,代码行数:29,代码来源:test_forest.py

示例3: _make_test_folds

    def _make_test_folds(self, X, y=None, groups=None):
        if self.shuffle:
            rng = check_random_state(self.random_state)
        else:
            rng = self.random_state
        y = np.asarray(y)
        n_samlples = len(X)
        y = ','.join(y).split(',')
        unique_y, y_inversed = np.unique(y, return_inverse=True)
        y_counts = bincount(y_inversed)
        min_groups = np.min(y_counts)
        if np.all(self.n_splits > y_counts):
            raise ValueError("All the n_groups for individual classes"
                             " are less than n_splits=%d."
                             % (self.n_splits))
        if self.n_splits > min_groups:
            warnings.warn(("The least populated class in y has only %d"
                           " members, which is too few. The minimum"
                           " number of groups for any class cannot"
                           " be less than n_splits=%d."
                           % (min_groups, self.n_splits)), Warning)

        # pre-assign each sample to a test fold index using individual KFold
        # splitting strategies for each class so as to respect the balance of
        # classes

        # NOTE: Passing the data corresponding to ith class say X[y==class_i]
        # will break when the data is not 100% stratifiable for all classes.
        # So we pass np.zeroes(max(c, n_splits)) as data to the KFold
        test_folds = iterative_stratification(X, set(y), self.n_splits, rng)
        return test_folds
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:31,代码来源:cross_validation.py

示例4: _recompute_centers

def _recompute_centers( X, labels, n_clusters):
    """
    Computation of cluster centers / means.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)

    labels: array of integers, shape (n_samples)
        Current label assignment

    n_clusters: int
        Number of desired clusters

    Returns
    -------
    centers: array, shape (n_clusters, n_features)
        The resulting centers
    """

    n_samples = X.shape[0]
    n_features = X.shape[1]
   
    # Initialize centers to all zero
    centers = np.zeros((n_clusters, n_features))
    n_samples_in_cluster = bincount(labels, minlength=n_clusters)

    # Compute a center for each label
    # For each label, average over samples and features
    # TODO: IMPLEMENT
        # Take all of the samples in a cluster and average their features

    return centers
开发者ID:AkiraKaneshiro,项目名称:gadsdc,代码行数:33,代码来源:kmeans_exercise.py

示例5: _generate_unsampled_indices

def _generate_unsampled_indices(random_state, n_samples):
    '''Samples out of bag'''
    sample_indices = _generate_sample_indices(random_state, n_samples)
    sample_counts = bincount(sample_indices, minlength=n_samples)
    unsampled_mask = sample_counts == 0
    indices_range = np.arange(n_samples)
    unsampled_indices = indices_range[unsampled_mask]
    return unsampled_indices
开发者ID:thomasbrawner,项目名称:regime_failure,代码行数:8,代码来源:oob_validation.py

示例6: entropy

    def entropy(samples):
        n_samples = len(samples)
        entropy = 0.

        for count in bincount(samples):
            p = 1. * count / n_samples
            if p > 0:
                entropy -= p * np.log2(p)

        return entropy
开发者ID:EddieBurning,项目名称:scikit-learn,代码行数:10,代码来源:test_forest.py

示例7: test_sample_weight

def test_sample_weight():
    """Check sample weighting."""
    # Test that zero-weighted samples are not taken into account
    X = np.arange(100)[:, np.newaxis]
    y = np.ones(100)
    y[:50] = 0.0

    sample_weight = np.ones(100)
    sample_weight[y == 0] = 0.0

    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_array_equal(clf.predict(X), np.ones(100))

    # Test that low weighted samples are not taken into account at low depth
    X = np.arange(200)[:, np.newaxis]
    y = np.zeros(200)
    y[50:100] = 1
    y[100:200] = 2
    X[100:200, 0] = 200

    sample_weight = np.ones(200)

    sample_weight[y == 2] = .51  # Samples of class '2' are still weightier
    clf = DecisionTreeClassifier(max_depth=1, random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 149.5)

    sample_weight[y == 2] = .50  # Samples of class '2' are no longer weightier
    clf = DecisionTreeClassifier(max_depth=1, random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 49.5)  # Threshold should have moved

    # Test that sample weighting is the same as having duplicates
    X = iris.data
    y = iris.target

    duplicates = rng.randint(0, X.shape[0], 200)

    clf = DecisionTreeClassifier(random_state=1)
    clf.fit(X[duplicates], y[duplicates])

    sample_weight = bincount(duplicates, minlength=X.shape[0])
    clf2 = DecisionTreeClassifier(random_state=1)
    clf2.fit(X, y, sample_weight=sample_weight)

    internal = clf.tree_.children_left != tree._tree.TREE_LEAF
    assert_array_almost_equal(clf.tree_.threshold[internal],
                              clf2.tree_.threshold[internal])
开发者ID:Niteloser,项目名称:scikit-learn,代码行数:49,代码来源:test_tree.py

示例8: _recompute_centers

def _recompute_centers( X, labels, n_clusters):
    """
    Computation of cluster centers / means.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)

    labels: array of integers, shape (n_samples)
        Current label assignment

    n_clusters: int
        Number of desired clusters

    Returns
    -------
    centers: array, shape (n_clusters, n_features)
        The resulting centers
    """

    n_samples = X.shape[0]
    n_features = X.shape[1]
   
    # Initialize centers to all zero
    centers = np.zeros((n_clusters, n_features))
    n_samples_in_cluster = bincount(labels, minlength=n_clusters)


    # Compute a center for each label
    # For each label, average over samples and features
    #TODO: IMPLEMENT
        # Take all of the samples in a cluster and add their features
    # For each sample
    # What label is it? Let's say its label x
    # Add feature i to label X's feature value i
    for sample_idx in xrange(n_samples):
        label = labels[sample_idx]

        centers[label] += X[sample_idx]
        #for j in xrange(n_features):
        #   centers[label[j]] +=X[sample_idx[j]]


    # Normalize by the size of the cluster
    centers /= n_samples_in_cluster[:, np.newaxis]

    return centers
开发者ID:vijayvenkatesh,项目名称:datascienceplusMLcode,代码行数:47,代码来源:kmeans_exercise.py

示例9: check_min_samples_leaf

def check_min_samples_leaf(name, X, y):
    # Test if leaves contain more than leaf_count training examples
    ForestEstimator = FOREST_ESTIMATORS[name]

    # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
    # by setting max_leaf_nodes
    for max_leaf_nodes in (None, 1000):
        est = ForestEstimator(min_samples_leaf=5,
                              max_leaf_nodes=max_leaf_nodes,
                              random_state=0)
        est.fit(X, y)
        out = est.estimators_[0].tree_.apply(X)
        node_counts = bincount(out)
        # drop inner nodes
        leaf_count = node_counts[node_counts != 0]
        assert_greater(np.min(leaf_count), 4,
                       "Failed with {0}".format(name))
开发者ID:EddieBurning,项目名称:scikit-learn,代码行数:17,代码来源:test_forest.py

示例10: _balanced_parallel_build_trees

def _balanced_parallel_build_trees(n_trees, forest, X, y, sample_weight, sample_mask, X_argsorted, seed, verbose):
    """Private function used to build a batch of trees within a job"""
    from sklearn.utils import check_random_state
    from sklearn.utils.fixes import bincount
    import random
    MAX_INT = numpy.iinfo(numpy.int32).max
    random_state = check_random_state(seed)

    trees = []
    for i in xrange(n_trees):
        if verbose > 1:
            print("building tree %d of %d" % (i+1, n_trees))
        seed = random_state.randint(MAX_INT)

        tree = forest._make_estimator(append = False)
        tree.set_params(compute_importances=forest.compute_importances)
        tree.set_params(random_state = check_random_state(seed))

        if forest.bootstrap:
            n_samples = X.shape[0]
            if sample_weight is None:
                curr_sample_weight = numpy.ones((n_samples,), dtype=numpy.float64)
            else:
                curr_sample_weight = sample_weight.copy()

            ty = list(enumerate(y))
            indices = DataUtils.FilterData(ty, val=1, frac=0.5, col=1, indicesToUse=0, indicesOnly=1)[0]
            indices2 = random_state.randint(0, len(indices), len(indices))
            indices = [indices[j] for j in indices2]
            sample_counts = bincount(indices, minlength=n_samples)

            curr_sample_weight *= sample_counts
            curr_sample_mask = sample_mask.copy()
            curr_sample_mask[sample_counts==0] = False

            tree.fit(X, y, sample_weight=curr_sample_weight, sample_mask=curr_sample_mask, X_argsorted=X_argsorted, check_input=False)
            tree.indices = curr_sample_mask
        else:
            tree.fit(X, y, sample_weight=sample_weight, sample_mask=sample_mask, X_argsorted=X_argsorted, check_input=False)
        trees.append(tree)
    return trees
开发者ID:ryancoleman,项目名称:TDT-tutorial-2014,代码行数:41,代码来源:random_forest_functions.py

示例11: _recompute_centers

def _recompute_centers( X, labels, n_clusters):
    """
    Computation of cluster centers / means.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)

    labels: array of integers, shape (n_samples)
        Current label assignment

    n_clusters: int
        Number of desired clusters

    Returns
    -------
    centers: array, shape (n_clusters, n_features)
        The resulting centers
    """

    n_samples = X.shape[0]
    n_features = X.shape[1]
   
    # Initialize centers to all zero
    centers = np.zeros((n_clusters, n_features))
    n_samples_in_cluster = bincount(labels, minlength=n_clusters)


    # Compute a center for each label
    # For each label, average over samples and features
    #TODO: IMPLEMENT
    # 1. For each sample
    # 2. What label is it? Let's say its label is 'label'
    # 3. Add feature X's feature i to centers[label] feature value i

    # Normalize by the size of the cluster
    centers /= n_samples_in_cluster[:, np.newaxis]

    return centers
开发者ID:asbhat,项目名称:Data-Science-Class,代码行数:39,代码来源:template_kmeans_exercise.py

示例12: _iter_indices

    def _iter_indices(self):
        rng = np.random.RandomState(self.random_state)
        cls_count = bincount(self.y_indices)

        for n in range(self.n_iter):
            train = []
            test = []

            for i, cls in enumerate(self.classes):
                sample_size = int(cls_count[i]*(1-self.test_size))
                randint = rng.randint(cls_count[i], size=sample_size)
                aidx = np.where((self.y == cls))[0]
                iidx = aidx[randint]
                oidx = aidx[list(set(range(cls_count[i])).difference(set(randint)))]

                train.extend(iidx)
                test.extend(oidx)

            train = rng.permutation(train)
            test = rng.permutation(test)

            yield train, test
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:22,代码来源:extreme_ensemble_selection.py

示例13: _recompute_centers

def _recompute_centers( X, labels, n_clusters):
    """
    Computation of cluster centers / means.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)

    labels: array of integers, shape (n_samples)
        Current label assignment

    n_clusters: int
        Number of desired clusters

    Returns
    -------
    centers: array, shape (n_clusters, n_features)
        The resulting centers
    """

    n_samples = X.shape[0]
    n_features = X.shape[1]
   
    # Initialize centers to all zero
    centers = np.zeros((n_clusters, n_features))
    n_samples_in_cluster = bincount(labels, minlength=n_clusters)


    # Compute a center for each label
    # For each label, average over samples and features
    #TODO: IMPLEMENT
    for i in range(n_samples):
        for j in range(n_features):
            centers[labels[i], j] += X[i, j]

    # Normalize by the size of the cluster
    centers /= n_samples_in_cluster[:, np.newaxis]

    return centers
开发者ID:GusSand,项目名称:GADS7,代码行数:39,代码来源:kmeans_exercise_final.py

示例14: test_sample_weight

def test_sample_weight():
    """Check sample weighting."""
    # Test that zero-weighted samples are not taken into account
    X = np.arange(100)[:, np.newaxis]
    y = np.ones(100)
    y[:50] = 0.0

    sample_weight = np.ones(100)
    sample_weight[y == 0] = 0.0

    clf = tree.DecisionTreeClassifier()
    clf.fit(X, y, sample_weight=sample_weight)
    assert_array_equal(clf.predict(X), np.ones(100))

    # Test that low weighted samples are not taken into account at low depth
    X = np.arange(200)[:, np.newaxis]
    y = np.zeros(200)
    y[50:100] = 1
    y[100:200] = 2
    X[100:200, 0] = 200

    sample_weight = np.ones(200)

    sample_weight[y == 2] = .51  # Samples of class '2' are still weightier
    clf = tree.DecisionTreeClassifier(max_depth=1)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 149.5)

    sample_weight[y == 2] = .50  # Samples of class '2' are no longer weightier
    clf = tree.DecisionTreeClassifier(max_depth=1)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 49.5)  # Threshold should have moved

    # Test that sample weighting is the same as having duplicates
    X = iris.data
    y = iris.target

    duplicates = rng.randint(0, X.shape[0], 1000)

    clf = tree.DecisionTreeClassifier(random_state=1)
    clf.fit(X[duplicates], y[duplicates])

    from sklearn.utils.fixes import bincount
    sample_weight = bincount(duplicates, minlength=X.shape[0])
    clf2 = tree.DecisionTreeClassifier(random_state=1)
    clf2.fit(X, y, sample_weight=sample_weight)

    internal = clf.tree_.children_left != tree._tree.TREE_LEAF
    assert_array_equal(clf.tree_.threshold[internal],
                       clf2.tree_.threshold[internal])

    # Test negative weights
    X = iris.data
    y = iris.target

    sample_weight = -np.ones(X.shape[0])
    clf = tree.DecisionTreeClassifier(random_state=1)
    assert_raises(ValueError, clf.fit, X, y, sample_weight=sample_weight)

    sample_weight = np.ones(X.shape[0])
    sample_weight[0] = -1
    clf = tree.DecisionTreeClassifier(random_state=1)
    clf.fit(X, y, sample_weight=sample_weight)

    # Check that predict_proba returns valid probabilities in the presence of
    # samples with negative weight
    X = iris.data
    y = iris.target

    sample_weight = rng.normal(.5, 1.0, X.shape[0])
    clf = tree.DecisionTreeClassifier(random_state=1)
    clf.fit(X, y, sample_weight=sample_weight)
    proba = clf.predict_proba(X)
    assert (proba >= 0).all() and (proba <= 1).all()
开发者ID:Calvin-O,项目名称:scikit-learn,代码行数:74,代码来源:test_tree.py

示例15: sensitivity_specificity_support


#.........这里部分代码省略.........
            raise ValueError("Target is %s but average='binary'. Please "
                             "choose another average setting." % y_type)
    elif pos_label not in (None, 1):
        warnings.warn("Note that pos_label (set to %r) is ignored when "
                      "average != 'binary' (got %r). You may use "
                      "labels=[pos_label] to specify a single positive class."
                      % (pos_label, average), UserWarning)

    if labels is None:
        labels = present_labels
        n_labels = None
    else:
        n_labels = len(labels)
        labels = np.hstack(
            [labels, np.setdiff1d(
                present_labels, labels, assume_unique=True)])

    # Calculate tp_sum, pred_sum, true_sum ###

    if y_type.startswith('multilabel'):
        raise ValueError('imblearn does not support multilabel')
    elif average == 'samples':
        raise ValueError("Sample-based precision, recall, fscore is "
                         "not meaningful outside multilabel "
                         "classification. See the accuracy_score instead.")
    else:
        le = LabelEncoder()
        le.fit(labels)
        y_true = le.transform(y_true)
        y_pred = le.transform(y_pred)
        sorted_labels = le.classes_

        # labels are now from 0 to len(labels) - 1 -> use bincount
        tp = y_true == y_pred
        tp_bins = y_true[tp]
        if sample_weight is not None:
            tp_bins_weights = np.asarray(sample_weight)[tp]
        else:
            tp_bins_weights = None

        if len(tp_bins):
            tp_sum = bincount(
                tp_bins, weights=tp_bins_weights, minlength=len(labels))
        else:
            # Pathological case
            true_sum = pred_sum = tp_sum = np.zeros(len(labels))
        if len(y_pred):
            pred_sum = bincount(
                y_pred, weights=sample_weight, minlength=len(labels))
        if len(y_true):
            true_sum = bincount(
                y_true, weights=sample_weight, minlength=len(labels))

        # Compute the true negative
        tn_sum = y_true.size - (pred_sum + true_sum - tp_sum)

        # Retain only selected labels
        indices = np.searchsorted(sorted_labels, labels[:n_labels])
        tp_sum = tp_sum[indices]
        true_sum = true_sum[indices]
        pred_sum = pred_sum[indices]
        tn_sum = tn_sum[indices]

    if average == 'micro':
        tp_sum = np.array([tp_sum.sum()])
        pred_sum = np.array([pred_sum.sum()])
        true_sum = np.array([true_sum.sum()])
        tn_sum = np.array([tn_sum.sum()])

    # Finally, we have all our sufficient statistics. Divide! #

    with np.errstate(divide='ignore', invalid='ignore'):
        # Divide, and on zero-division, set scores to 0 and warn:

        # Oddly, we may get an "invalid" rather than a "divide" error
        # here.
        specificity = _prf_divide(tn_sum, tn_sum + pred_sum - tp_sum,
                                  'specificity', 'predicted', average,
                                  warn_for)
        sensitivity = _prf_divide(tp_sum, true_sum, 'sensitivity', 'true',
                                  average, warn_for)

    # Average the results

    if average == 'weighted':
        weights = true_sum
        if weights.sum() == 0:
            return 0, 0, None
    elif average == 'samples':
        weights = sample_weight
    else:
        weights = None

    if average is not None:
        assert average != 'binary' or len(specificity) == 1
        specificity = np.average(specificity, weights=weights)
        sensitivity = np.average(sensitivity, weights=weights)
        true_sum = None  # return no support

    return sensitivity, specificity, true_sum
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:101,代码来源:classification.py


注:本文中的sklearn.utils.fixes.bincount函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。