当前位置: 首页>>代码示例>>Python>>正文


Python utils.safe_indexing函数代码示例

本文整理汇总了Python中sklearn.utils.safe_indexing函数的典型用法代码示例。如果您正苦于以下问题:Python safe_indexing函数的具体用法?Python safe_indexing怎么用?Python safe_indexing使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了safe_indexing函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sample_data

def sample_data(data, train_idx, test_idx):
    sample = bunch.Bunch(train=bunch.Bunch(), test=bunch.Bunch(), target_names=None)

    # sample.target_names = data.target_names

    # sample.train.data = safe_indexing(data.train.data,train_idx)
    sample.train.target = safe_indexing(data.train.target,train_idx)
    sample.train.bow = safe_indexing(data.train.bow,train_idx)
    sample.train.remaining = []
    sample.train.validation = []
    sample.train.revisit = []

    sample.train.snippets=safe_indexing(data.train.snippets,train_idx)
    sample.train.sizes=safe_indexing(data.train.sizes,train_idx)
    sample.train.snippet_cost = safe_indexing(data.train.snippet_cost,train_idx)


    if len(test_idx) > 0: #if there are test indexes
        # sample.test.data = safe_indexing(data.train.target,test_idx)
        sample.test.target = safe_indexing(data.train.target,test_idx)
        sample.test.bow = safe_indexing(data.train.bow,train_idx)
        sample.test.snippets=safe_indexing(data.train.snippets,train_idx)
        sample.test.sizes=safe_indexing(data.train.sizes,train_idx)
        sample.test.snippet_cost = safe_indexing(data.train.snippet_cost,train_idx)

    else:
        sample.test = data.test

    return sample.train, sample.test
开发者ID:mramire8,项目名称:utility-based,代码行数:29,代码来源:experimentutils.py

示例2: _safe_split

def _safe_split(estimator, X, y, indices, train_indices=None):
    """Create subset of dataset and properly handle kernels."""
    if hasattr(estimator, 'kernel') and callable(estimator.kernel):
        # cannot compute the kernel values with custom function
        raise ValueError("Cannot use a custom kernel function. "
                         "Precompute the kernel matrix instead.")

    if not hasattr(X, "shape"):
        if getattr(estimator, "_pairwise", False):
            raise ValueError("Precomputed kernels or affinity matrices have "
                             "to be passed as arrays or sparse matrices.")
        X_subset = [X[idx] for idx in indices]
    else:
        if getattr(estimator, "_pairwise", False):
            # X is a precomputed square kernel matrix
            if X.shape[0] != X.shape[1]:
                raise ValueError("X should be a square kernel matrix")
            if train_indices is None:
                X_subset = X[np.ix_(indices, indices)]
            else:
                X_subset = X[np.ix_(indices, train_indices)]
        else:
            X_subset = safe_indexing(X, indices)

    if y is not None:
        y_subset = safe_indexing(y, indices)
    else:
        y_subset = None

    return X_subset, y_subset
开发者ID:vene,项目名称:ambra,代码行数:30,代码来源:cross_validation.py

示例3: _safe_split

def _safe_split(depthmaps, offset_points_projected, direction_vectors, true_joints, indices):
    depth_subset = safe_indexing(depthmaps, indices)
    offsets_subset = safe_indexing(offset_points_projected, indices)
    directions_subset = safe_indexing(direction_vectors, indices)
    truths_subset = safe_indexing(true_joints, indices)
    
    return depth_subset, offsets_subset, directions_subset, truths_subset
开发者ID:aoikaneko,项目名称:RandomTreeWalk,代码行数:7,代码来源:grid_search.py

示例4: _split_fit_score_trial

    def _split_fit_score_trial(self, X, y, idx=0):
        """
        Splits the dataset, fits a clone of the estimator, then scores it
        according to the required metrics.

        The index of the split is added to the random_state if the
        random_state is not None; this ensures that every split is shuffled
        differently but in a deterministic fashion for testing purposes.
        """
        random_state = self.random_state
        if random_state is not None:
            random_state += idx

        splitter = self._check_cv(self.cv, random_state)

        for train_index, test_index in splitter.split(X, y):
            # Safe indexing handles multiple types of inputs including
            # DataFrames and structured arrays - required for generic splits.
            X_train = safe_indexing(X, train_index)
            y_train = safe_indexing(y, train_index)
            X_test = safe_indexing(X, test_index)
            y_test = safe_indexing(y, test_index)

            model = clone(self.estimator)
            model.fit(X_train, y_train)

            if hasattr(model, "predict_proba"):
                # Get the probabilities for the positive class
                y_scores = model.predict_proba(X_test)[:,1]
            else:
                # Use the decision function to get the scores
                y_scores = model.decision_function(X_test)

            # Compute the curve metrics and thresholds
            curve_metrics = precision_recall_curve(y_test, y_scores)
            precision, recall, thresholds = curve_metrics

            # Compute the F1 score from precision and recall
            # Don't need to warn for F, precision/recall would have warned
            with np.errstate(divide='ignore', invalid='ignore'):
                beta = self.fbeta ** 2
                f_score = ((1 + beta) * precision * recall /
                   (beta * precision + recall))

            # Ensure thresholds ends at 1
            thresholds = np.append(thresholds, 1)

            # Compute the queue rate
            queue_rate = np.array([
                (y_scores >= threshold).mean()
                for threshold in thresholds
            ])

            yield {
                'thresholds': thresholds,
                'precision': precision,
                'recall': recall,
                'fscore': f_score,
                'queue_rate': queue_rate
            }
开发者ID:DistrictDataLabs,项目名称:yellowbrick,代码行数:60,代码来源:threshold.py

示例5: test_safe_indexing_mock_pandas

def test_safe_indexing_mock_pandas():
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    X_df = MockDataFrame(X)
    inds = np.array([1, 2])
    X_df_indexed = safe_indexing(X_df, inds)
    X_indexed = safe_indexing(X_df, inds)
    assert_array_equal(np.array(X_df_indexed), X_indexed)
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:7,代码来源:test_utils.py

示例6: _fit_resample

    def _fit_resample(self, X, y):
        random_state = check_random_state(self.random_state)

        idx_under = np.empty((0, ), dtype=int)

        for target_class in np.unique(y):
            if target_class in self.sampling_strategy_.keys():
                n_samples = self.sampling_strategy_[target_class]
                index_target_class = random_state.choice(
                    range(np.count_nonzero(y == target_class)),
                    size=n_samples,
                    replace=self.replacement)
            else:
                index_target_class = slice(None)

            idx_under = np.concatenate(
                (idx_under,
                 np.flatnonzero(y == target_class)[index_target_class]),
                axis=0)

        if self.return_indices:
            return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
                    idx_under)
        else:
            return safe_indexing(X, idx_under), safe_indexing(y, idx_under)
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:25,代码来源:_random_under_sampler.py

示例7: _fit_resample

    def _fit_resample(self, X, y):
        n_samples = X.shape[0]

        # convert y to z_score
        y_z = (y - y.mean()) / y.std()

        index0 = np.arange(n_samples)
        index_negative = index0[y_z > self.negative_thres]
        index_positive = index0[y_z <= self.positive_thres]
        index_unclassified = [x for x in index0
                              if x not in index_negative
                              and x not in index_positive]

        y_z[index_negative] = 0
        y_z[index_positive] = 1
        y_z[index_unclassified] = -1

        ros = RandomOverSampler(
            sampling_strategy=self.sampling_strategy,
            random_state=self.random_state,
            ratio=self.ratio)
        _, _ = ros.fit_resample(X, y_z)
        sample_indices = ros.sample_indices_

        print("Before sampler: %s. Total after: %s"
              % (Counter(y_z), sample_indices.shape))

        self.sample_indices_ = np.array(sample_indices)

        if self.return_indices:
            return (safe_indexing(X, sample_indices),
                    safe_indexing(y, sample_indices),
                    sample_indices)
        return (safe_indexing(X, sample_indices),
                safe_indexing(y, sample_indices))
开发者ID:bgruening,项目名称:galaxytools,代码行数:35,代码来源:preprocessors.py

示例8: test_safe_indexing

def test_safe_indexing():
    X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    inds = np.array([1, 2])
    X_inds = safe_indexing(X, inds)
    X_arrays = safe_indexing(np.array(X), inds)
    assert_array_equal(np.array(X_inds), X_arrays)
    assert_array_equal(np.array(X_inds), np.array(X)[inds])
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:7,代码来源:test_utils.py

示例9: generate_train_set

    def generate_train_set(self, train_size=None, test_size=None, rand_state=None):
        """



        :param test_size:
        :param rand_state:
        :param train_size: float or int (default=20)
            If float, should be between 0.0 and 1.0 and represent the
            proportion of the dataset to include in the train split. If
            int, represents the absolute number of train samples.
        :return:
        """
        # self.probe.clear()
        # self.gallery.clear()

        if train_size is None and test_size is None:
            self.probe.files_train, self.probe.files_test = [], self.probe.files
            self.gallery.files_train, self.gallery.files_test = [], self.gallery.files
            self.train_indexes, self.test_indexes = [], list(range(0, len(self.probe.files)))
        else:
            n_samples = len(self.probe.files)
            cv = ShuffleSplit(n_samples, test_size=test_size, train_size=train_size, random_state=rand_state)
            train_indexes, test_indexes = next(iter(cv))
            arrays = [self.probe.files, self.gallery.files]
            self.probe.files_train, self.probe.files_test, self.gallery.files_train, self.gallery.files_test = \
                list(chain.from_iterable((safe_indexing(a, train_indexes),
                                          safe_indexing(a, test_indexes)) for a in arrays))
            self.train_indexes, self.test_indexes = train_indexes, test_indexes

        self.train_size = len(self.train_indexes)
        self.test_size = len(self.test_indexes)
开发者ID:AShedko,项目名称:PyReID,代码行数:32,代码来源:dataset.py

示例10: _sample

    def _sample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : array-like, shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : {ndarray, sparse matrix}, shape \
(n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : ndarray, shape (n_samples_new,)
            The corresponding label of `X_resampled`

        """
        self._validate_estimator()

        if self.voting == 'auto':
            if sparse.issparse(X):
                self.voting_ = 'hard'
            else:
                self.voting_ = 'soft'
        else:
            if self.voting in VOTING_KIND:
                self.voting_ = self.voting
            else:
                raise ValueError("'voting' needs to be one of {}. Got {}"
                                 " instead.".format(VOTING_KIND, self.voting))

        X_resampled, y_resampled = [], []
        for target_class in np.unique(y):
            if target_class in self.ratio_.keys():
                n_samples = self.ratio_[target_class]
                self.estimator_.set_params(**{'n_clusters': n_samples})
                self.estimator_.fit(X[y == target_class])
                X_new, y_new = self._generate_sample(
                    X, y, self.estimator_.cluster_centers_, target_class)
                X_resampled.append(X_new)
                y_resampled.append(y_new)
            else:
                target_class_indices = np.flatnonzero(y == target_class)
                X_resampled.append(safe_indexing(X, target_class_indices))
                y_resampled.append(safe_indexing(y, target_class_indices))

        if sparse.issparse(X):
            X_resampled = sparse.vstack(X_resampled)
        else:
            X_resampled = np.vstack(X_resampled)
        y_resampled = np.hstack(y_resampled)

        return X_resampled, np.array(y_resampled)
开发者ID:glemaitre,项目名称:imbalanced-learn,代码行数:57,代码来源:cluster_centroids.py

示例11: test_safe_indexing_pandas

def test_safe_indexing_pandas():
    try:
        import pandas as pd
    except ImportError:
        raise SkipTest("Pandas not found")
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    X_df = pd.DataFrame(X)
    inds = np.array([1, 2])
    X_df_indexed = safe_indexing(X_df, inds)
    X_indexed = safe_indexing(X_df, inds)
    assert_array_equal(np.array(X_df_indexed), X_indexed)
开发者ID:Flushot,项目名称:scikit-learn,代码行数:11,代码来源:test_utils.py

示例12: generator

 def generator(X, y, sample_weight, indices, batch_size):
     while True:
         for index in range(0, len(indices), batch_size):
             X_res = safe_indexing(X, indices[index:index + batch_size])
             y_res = safe_indexing(y, indices[index:index + batch_size])
             if issparse(X_res) and not keep_sparse:
                 X_res = X_res.toarray()
             if sample_weight is None:
                 yield X_res, y_res
             else:
                 sw_res = safe_indexing(sample_weight,
                                        indices[index:index + batch_size])
                 yield X_res, y_res, sw_res
开发者ID:chkoar,项目名称:imbalanced-learn,代码行数:13,代码来源:_generator.py

示例13: _sample

    def _sample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : array-like, shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : {ndarray, sparse matrix}, shape \
(n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : ndarray, shape (n_samples_new,)
            The corresponding label of `X_resampled`

        idx_under : ndarray, shape (n_samples, )
            If `return_indices` is `True`, an array will be returned
            containing a boolean for each sample to represent whether
            that sample was selected or not.

        """
        random_state = check_random_state(self.random_state)

        idx_under = np.empty((0, ), dtype=int)

        for target_class in np.unique(y):
            if target_class in self.ratio_.keys():
                n_samples = self.ratio_[target_class]
                index_target_class = random_state.choice(
                    range(np.count_nonzero(y == target_class)),
                    size=n_samples,
                    replace=self.replacement)
            else:
                index_target_class = slice(None)

            idx_under = np.concatenate(
                (idx_under, np.flatnonzero(y == target_class)[
                    index_target_class]), axis=0)

        if self.return_indices:
            return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
                    idx_under)
        else:
            return safe_indexing(X, idx_under), safe_indexing(y, idx_under)
开发者ID:glemaitre,项目名称:imbalanced-learn,代码行数:49,代码来源:random_under_sampler.py

示例14: _sample

    def _sample(self, X, y):
        # FIXME: uncomment in version 0.6
        # self._validate_estimator()

        X_resampled = X.copy()
        y_resampled = y.copy()

        for class_sample, n_samples in self.sampling_strategy_.items():
            if n_samples == 0:
                continue
            target_class_indices = np.flatnonzero(y == class_sample)
            X_class = safe_indexing(X, target_class_indices)

            self.nn_k_.fit(X_class)
            nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
            X_new, y_new = self._make_samples(X_class, y.dtype, class_sample,
                                              X_class, nns, n_samples, 1.0)

            if sparse.issparse(X_new):
                X_resampled = sparse.vstack([X_resampled, X_new])
                sparse_func = 'tocsc' if X.format == 'csc' else 'tocsr'
                X_resampled = getattr(X_resampled, sparse_func)()
            else:
                X_resampled = np.vstack((X_resampled, X_new))
            y_resampled = np.hstack((y_resampled, y_new))

        return X_resampled, y_resampled
开发者ID:scikit-learn-contrib,项目名称:imbalanced-learn,代码行数:27,代码来源:_smote.py

示例15: _index_param_value

def _index_param_value(X, v, indices):
    """Private helper function for parameter value indexing."""
    if not _is_arraylike(v) or _num_samples(v) != _num_samples(X):
        # pass through: skip indexing
        return v
    if sp.issparse(v):
        v = v.tocsr()
    return safe_indexing(v, indices)
开发者ID:Meyenhofer,项目名称:pattern-recognition-2016,代码行数:8,代码来源:_validation.py


注:本文中的sklearn.utils.safe_indexing函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。