当前位置: 首页>>代码示例>>Python>>正文


Python KMeans.set_params方法代码示例

本文整理汇总了Python中sklearn.cluster.KMeans.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.set_params方法的具体用法?Python KMeans.set_params怎么用?Python KMeans.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.KMeans的用法示例。


在下文中一共展示了KMeans.set_params方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: categorise_dataset

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]
def categorise_dataset(contents):
    iris_setosa = []
    iris_versicolor = []
    iris_virginica = []
    for each_tuple in contents:
        if each_tuple[4] == 'Iris-virginica':
            iris_virginica.append(each_tuple[:4])
        elif each_tuple[4] == 'Iris-versicolor':
            iris_versicolor.append(each_tuple[:4])
        elif each_tuple[4] == 'Iris-setosa':
            iris_setosa.append(each_tuple[:4])

    kwargs = {
        'n_init': 5,
        # depends on number of cores in your machine.
        'n_jobs': 3,
        'n_clusters': 3,
    }
    kmeans = KMeans()
    kmeans.set_params(**kwargs)
    # apply kmeans
    iris_setosa_centroids_indices = kmeans.fit_predict(np.array(iris_setosa))
    iris_setosa_centroids = kmeans.cluster_centers_

    iris_versicolor_centroids_indices = kmeans.fit_predict(np.array(iris_versicolor))
    iris_versicolor_centroids = kmeans.cluster_centers_

    iris_virginica_centroids_indices = kmeans.fit_predict(np.array(iris_virginica))
    iris_virginica_centroids = kmeans.cluster_centers_
    return (iris_setosa_centroids,
            iris_versicolor_centroids,
            iris_virginica_centroids)
开发者ID:sreeram-boyapati,项目名称:general-codes,代码行数:34,代码来源:iris_kmeans.py

示例2: resample

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]
    def resample(self):
        '''


        :param ratio:
            The ratio of number of majority cluster centroids with respect to

        :param n_jobs:
        :param kargs:
        :return:
        '''

        # Create the clustering object
        from sklearn.cluster import KMeans
        kmeans = KMeans(random_state=self.rs)
        kmeans.set_params(**self.kargs)

        # Start with the minority class
        underx = self.x[self.y == self.minc]
        undery = self.y[self.y == self.minc]

        # Loop over the other classes under picking at random
        print('Finding cluster centroids...', end="")
        for key in self.ucd.keys():
            # If the minority class is up, skip it.
            if key == self.minc:
                continue

            # Set the number of clusters to be no more than the number of samples
            if self.ratio * self.ucd[self.minc] > self.ucd[key]:
                nclusters =  self.ucd[key]
            else:
                nclusters = int(self.ratio * self.ucd[self.minc])

            # Set the number of clusters and find the centroids
            kmeans.set_params(n_clusters = nclusters)
            kmeans.fit(self.x[self.y == key])
            centroids = kmeans.cluster_centers_

            # Concatenate to the minority class
            underx = concatenate((underx, centroids), axis = 0)
            undery = concatenate((undery, ones(nclusters) * key), axis = 0)
            print(".", end="")

        print("done!")

        return underx, undery
开发者ID:ajcobo,项目名称:tweetclassifier,代码行数:49,代码来源:UnbalancedDataset.py

示例3: resample

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]
    def resample(self):
        """
        ???

        :return:
        """

        # Compute the ratio if it is auto
        if self.ratio == 'auto':
            self.ratio = 1.

        # Create the clustering object
        from sklearn.cluster import KMeans
        kmeans = KMeans(random_state=self.rs)
        kmeans.set_params(**self.kwargs)

        # Start with the minority class
        underx = self.x[self.y == self.minc]
        undery = self.y[self.y == self.minc]

        # Loop over the other classes under picking at random
        for key in self.ucd.keys():
            # If the minority class is up, skip it.
            if key == self.minc:
                continue

            # Set the number of clusters to be no more than the number of
            # samples
            if self.ratio * self.ucd[self.minc] > self.ucd[key]:
                n_clusters = self.ucd[key]
            else:
                n_clusters = int(self.ratio * self.ucd[self.minc])

            # Set the number of clusters and find the centroids
            kmeans.set_params(n_clusters=n_clusters)
            kmeans.fit(self.x[self.y == key])
            centroids = kmeans.cluster_centers_

            # Concatenate to the minority class
            underx = concatenate((underx, centroids), axis=0)
            undery = concatenate((undery, ones(n_clusters) * key), axis=0)

        if self.verbose:
            print("Under-sampling performed: " + str(Counter(undery)))

        return underx, undery
开发者ID:EliNok,项目名称:UnbalancedDataset,代码行数:48,代码来源:under_sampling.py

示例4: ClusterCentroids

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]

#.........这里部分代码省略.........
                 random_state=None,
                 estimator=None,
                 n_jobs=1):
        super(ClusterCentroids, self).__init__(
            ratio=ratio, random_state=random_state)
        self.estimator = estimator
        self.n_jobs = n_jobs

    def _validate_estimator(self):
        """Private function to create the NN estimator"""

        if self.estimator is None:
            self.estimator_ = KMeans(
                random_state=self.random_state, n_jobs=self.n_jobs)
        elif isinstance(self.estimator, KMeans):
            self.estimator_ = self.estimator
        else:
            raise ValueError('`estimator` has to be a KMeans clustering.')

    def fit(self, X, y):
        """Find the classes statistics before to perform sampling.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : ndarray, shape (n_samples, )
            Corresponding label for each sample in X.

        Returns
        -------
        self : object,
            Return self.

        """

        super(ClusterCentroids, self).fit(X, y)

        self._validate_estimator()

        return self

    def _sample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : ndarray, shape (n_samples, )
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : ndarray, shape (n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : ndarray, shape (n_samples_new)
            The corresponding label of `X_resampled`

        """

        # Compute the number of cluster needed
        if self.ratio == 'auto':
            num_samples = self.stats_c_[self.min_c_]
        else:
            num_samples = int(self.stats_c_[self.min_c_] / self.ratio)

        # Set the number of sample for the estimator
        self.estimator_.set_params(**{'n_clusters': num_samples})

        # Start with the minority class
        X_min = X[y == self.min_c_]
        y_min = y[y == self.min_c_]

        # All the minority class samples will be preserved
        X_resampled = X_min.copy()
        y_resampled = y_min.copy()

        # Loop over the other classes under picking at random
        for key in self.stats_c_.keys():

            # If the minority class is up, skip it.
            if key == self.min_c_:
                continue

            # Find the centroids via k-means
            self.estimator_.fit(X[y == key])
            centroids = self.estimator_.cluster_centers_

            # Concatenate to the minority class
            X_resampled = np.concatenate((X_resampled, centroids), axis=0)
            y_resampled = np.concatenate(
                (y_resampled, np.array([key] * num_samples)), axis=0)

        self.logger.info('Under-sampling performed: %s', Counter(y_resampled))

        return X_resampled, y_resampled
开发者ID:kellyhennigan,项目名称:cueexp_scripts,代码行数:104,代码来源:cluster_centroids.py

示例5: DataCluster

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]
class DataCluster():

    def __init__(self, nCluster, minDist, nQuatCluster, minQuatDist):
        print 'Init DataCluster.'
        self.set_params(nCluster, minDist, nQuatCluster, minQuatDist)

    def set_params(self, nCluster, minDist, nQuatCluster, minQuatDist):
        self.nCluster = nCluster
        self.fMinDist = minDist

        self.nQuatCluster = nQuatCluster
        self.fMinQuatDist = minQuatDist
        
        self.ml = KMeans(n_clusters=nCluster, max_iter=300, n_jobs=6)
        
    def readData(self):
        print 'Read data manually.'
        data_start=0
        data_finish=1000 #'end'
        model = 'bed'
        subject='sub6_shaver'
        print 'Starting to convert data!'
        self.runData = dr.DataReader(subject=subject,data_start=data_start,data_finish=data_finish,model=model)      
        #dr_obs = dr.DataReader(subject=subject,data_start=data_start,data_finish=data_finish,model=model)        
        #self.runData = dr_obs.get_raw_data(self)

        
    def mat_to_pos_quat(self, raw_data):

        raw_pos  = np.zeros((len(raw_data),3)) #array
        raw_quat = np.zeros((len(raw_data),4))
        
        #-----------------------------------------------------------#
        ## Decompose data into pos,quat pairs
        for i in xrange(len(raw_data)):  
            raw_pos[i,:]  = np.array([raw_data[i][0,3],raw_data[i][1,3],raw_data[i][2,3]])
            raw_quat[i,:] = tft.quaternion_from_matrix(raw_data[i]) # order should be xyzw because ROS uses xyzw order.       

        return raw_pos, raw_quat

    def pos_clustering(self, raw_pos):

        while True:
            dict_params={}
            dict_params['n_clusters']=self.nCluster
            self.ml.set_params(**dict_params)
            self.ml.fit(raw_pos)

            # co-distance matrix
            bReFit = False
            co_pos_mat = np.zeros((self.nCluster,self.nCluster))
            for i in xrange(self.nCluster):

                # For refitting
                if bReFit == True: break
                
                for j in xrange(i, self.nCluster):
                    if i==j: 
                        co_pos_mat[i,j] = 1000000 # to avoid minimum check
                        continue
                    co_pos_mat[i,j] = co_pos_mat[j,i] = np.linalg.norm(self.ml.cluster_centers_[i] - self.ml.cluster_centers_[j])
                                        
                    if co_pos_mat[i,j] < self.fMinDist:
                        bReFit = True
                        break
                        
            if bReFit == True:
                self.nCluster -= 1
                print "New # of clusters: ", self.nCluster
                continue
            else:
                break
            
        raw_pos_index = self.ml.fit_predict(raw_pos)
        return raw_pos_index

    # Return a list of clustered index.
    def grouping(self, raw_data):
        print 'Start clustering.'
        print raw_data.shape

        #-----------------------------------------------------------#
        ## Initialization
        raw_pos, raw_quat = self.mat_to_pos_quat(raw_data)
        
        #-----------------------------------------------------------#
        ## K-mean Clustring by Position
        raw_pos_index = self.pos_clustering(raw_pos)
        
        return raw_pos_index
        
    def clustering(self, raw_data):
        print 'Start clustering.'
        print raw_data.shape

        #-----------------------------------------------------------#
        ## Initialization
        raw_pos, raw_quat = self.mat_to_pos_quat(raw_data)

        #-----------------------------------------------------------#
#.........这里部分代码省略.........
开发者ID:gt-ros-pkg,项目名称:hrl-assistive,代码行数:103,代码来源:data_clustering.py

示例6: _sample

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]
    def _sample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : ndarray, shape (n_samples, )
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : ndarray, shape (n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : ndarray, shape (n_samples_new)
            The corresponding label of `X_resampled`

        """
        random_state = check_random_state(self.random_state)

        # Compute the number of cluster needed
        if self.ratio == 'auto':
            num_samples = self.stats_c_[self.min_c_]
        else:
            num_samples = int(self.stats_c_[self.min_c_] / self.ratio)

        # Create the clustering object
        kmeans = KMeans(n_clusters=num_samples, random_state=random_state)
        kmeans.set_params(**self.kwargs)

        # Start with the minority class
        X_min = X[y == self.min_c_]
        y_min = y[y == self.min_c_]

        # All the minority class samples will be preserved
        X_resampled = X_min.copy()
        y_resampled = y_min.copy()

        # Loop over the other classes under picking at random
        for key in self.stats_c_.keys():

            # If the minority class is up, skip it.
            if key == self.min_c_:
                continue

            # Find the centroids via k-means
            kmeans.fit(X[y == key])
            centroids = kmeans.cluster_centers_

            # Concatenate to the minority class
            X_resampled = np.concatenate((X_resampled, centroids), axis=0)
            y_resampled = np.concatenate((y_resampled, np.array([key] *
                                                                num_samples)),
                                         axis=0)

        self.logger.info('Under-sampling performed: %s', Counter(
            y_resampled))

        return X_resampled, y_resampled
开发者ID:dvro,项目名称:imbalanced-learn,代码行数:63,代码来源:cluster_centroids.py

示例7: ClusterCentroids

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]

#.........这里部分代码省略.........

    """

    def __init__(self,
                 ratio='auto',
                 random_state=None,
                 estimator=None,
                 voting='auto',
                 n_jobs=1):
        super(ClusterCentroids, self).__init__(
            ratio=ratio)
        self.random_state = random_state
        self.estimator = estimator
        self.voting = voting
        self.n_jobs = n_jobs

    def _validate_estimator(self):
        """Private function to create the KMeans estimator"""
        if self.estimator is None:
            self.estimator_ = KMeans(
                random_state=self.random_state, n_jobs=self.n_jobs)
        elif isinstance(self.estimator, KMeans):
            self.estimator_ = self.estimator
        else:
            raise ValueError('`estimator` has to be a KMeans clustering.'
                             ' Got {} instead.'.format(type(self.estimator)))

    def _generate_sample(self, X, y, centroids, target_class):
        if self.voting_ == 'hard':
            nearest_neighbors = NearestNeighbors(n_neighbors=1)
            nearest_neighbors.fit(X, y)
            indices = nearest_neighbors.kneighbors(centroids,
                                                   return_distance=False)
            X_new = safe_indexing(X, np.squeeze(indices))
        else:
            if sparse.issparse(X):
                X_new = sparse.csr_matrix(centroids)
            else:
                X_new = centroids
        y_new = np.array([target_class] * centroids.shape[0])

        return X_new, y_new

    def _sample(self, X, y):
        """Resample the dataset.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : array-like, shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        X_resampled : {ndarray, sparse matrix}, shape \
(n_samples_new, n_features)
            The array containing the resampled data.

        y_resampled : ndarray, shape (n_samples_new,)
            The corresponding label of `X_resampled`

        """
        self._validate_estimator()

        if self.voting == 'auto':
            if sparse.issparse(X):
                self.voting_ = 'hard'
            else:
                self.voting_ = 'soft'
        else:
            if self.voting in VOTING_KIND:
                self.voting_ = self.voting
            else:
                raise ValueError("'voting' needs to be one of {}. Got {}"
                                 " instead.".format(VOTING_KIND, self.voting))

        X_resampled, y_resampled = [], []
        for target_class in np.unique(y):
            if target_class in self.ratio_.keys():
                n_samples = self.ratio_[target_class]
                self.estimator_.set_params(**{'n_clusters': n_samples})
                self.estimator_.fit(X[y == target_class])
                X_new, y_new = self._generate_sample(
                    X, y, self.estimator_.cluster_centers_, target_class)
                X_resampled.append(X_new)
                y_resampled.append(y_new)
            else:
                target_class_indices = np.flatnonzero(y == target_class)
                X_resampled.append(safe_indexing(X, target_class_indices))
                y_resampled.append(safe_indexing(y, target_class_indices))

        if sparse.issparse(X):
            X_resampled = sparse.vstack(X_resampled)
        else:
            X_resampled = np.vstack(X_resampled)
        y_resampled = np.hstack(y_resampled)

        return X_resampled, np.array(y_resampled)
开发者ID:glemaitre,项目名称:imbalanced-learn,代码行数:104,代码来源:cluster_centroids.py

示例8: ClusterCentroids

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]

#.........这里部分代码省略.........
    Supports multi-class resampling by sampling each class independently.

    Examples
    --------

    >>> from collections import Counter
    >>> from sklearn.datasets import make_classification
    >>> from imblearn.under_sampling import \
ClusterCentroids # doctest: +NORMALIZE_WHITESPACE
    >>> X, y = make_classification(n_classes=2, class_sep=2,
    ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
    ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
    >>> print('Original dataset shape %s' % Counter(y))
    Original dataset shape Counter({{1: 900, 0: 100}})
    >>> cc = ClusterCentroids(random_state=42)
    >>> X_res, y_res = cc.fit_resample(X, y)
    >>> print('Resampled dataset shape %s' % Counter(y_res))
    ... # doctest: +ELLIPSIS
    Resampled dataset shape Counter({{...}})

    """

    def __init__(self,
                 sampling_strategy='auto',
                 random_state=None,
                 estimator=None,
                 voting='auto',
                 n_jobs=1,
                 ratio=None):
        super(ClusterCentroids, self).__init__(
            sampling_strategy=sampling_strategy, ratio=ratio)
        self.random_state = random_state
        self.estimator = estimator
        self.voting = voting
        self.n_jobs = n_jobs

    def _validate_estimator(self):
        """Private function to create the KMeans estimator"""
        if self.estimator is None:
            self.estimator_ = KMeans(
                random_state=self.random_state, n_jobs=self.n_jobs)
        elif isinstance(self.estimator, KMeans):
            self.estimator_ = clone(self.estimator)
        else:
            raise ValueError('`estimator` has to be a KMeans clustering.'
                             ' Got {} instead.'.format(type(self.estimator)))

    def _generate_sample(self, X, y, centroids, target_class):
        if self.voting_ == 'hard':
            nearest_neighbors = NearestNeighbors(n_neighbors=1)
            nearest_neighbors.fit(X, y)
            indices = nearest_neighbors.kneighbors(
                centroids, return_distance=False)
            X_new = safe_indexing(X, np.squeeze(indices))
        else:
            if sparse.issparse(X):
                X_new = sparse.csr_matrix(centroids, dtype=X.dtype)
            else:
                X_new = centroids
        y_new = np.array([target_class] * centroids.shape[0], dtype=y.dtype)

        return X_new, y_new

    def _fit_resample(self, X, y):
        self._validate_estimator()

        if self.voting == 'auto':
            if sparse.issparse(X):
                self.voting_ = 'hard'
            else:
                self.voting_ = 'soft'
        else:
            if self.voting in VOTING_KIND:
                self.voting_ = self.voting
            else:
                raise ValueError("'voting' needs to be one of {}. Got {}"
                                 " instead.".format(VOTING_KIND, self.voting))

        X_resampled, y_resampled = [], []
        for target_class in np.unique(y):
            if target_class in self.sampling_strategy_.keys():
                n_samples = self.sampling_strategy_[target_class]
                self.estimator_.set_params(**{'n_clusters': n_samples})
                self.estimator_.fit(X[y == target_class])
                X_new, y_new = self._generate_sample(
                    X, y, self.estimator_.cluster_centers_, target_class)
                X_resampled.append(X_new)
                y_resampled.append(y_new)
            else:
                target_class_indices = np.flatnonzero(y == target_class)
                X_resampled.append(safe_indexing(X, target_class_indices))
                y_resampled.append(safe_indexing(y, target_class_indices))

        if sparse.issparse(X):
            X_resampled = sparse.vstack(X_resampled)
        else:
            X_resampled = np.vstack(X_resampled)
        y_resampled = np.hstack(y_resampled)

        return X_resampled, np.array(y_resampled, dtype=y.dtype)
开发者ID:chkoar,项目名称:imbalanced-learn,代码行数:104,代码来源:_cluster_centroids.py

示例9: KMeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]

#.........这里部分代码省略.........
            copy.deepcopy(self.model_.cluster_centers_)

    def _reset(self):
        """Resets all attributes (erases the model)"""
        self.model_ = None
        self.n_clusters_ = None
        self.sample_labels_ = None
        self.sample_distances_ = None

    def fit(self, X, K, sample_labels=None, estimator_params=None):
        """Fits a Sklearn KMeans model to X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data.

        K : int
            The number of clusters.

        sample_labels : array-like, shape (n_samples), optional
                        Labels for each of the samples in X.

        estimator_params : dict, optional
                           The parameters to pass to the KMeans estimators.


        Returns
        -------
        self
        """
        self._reset()
        # Note: previously set n_init=50
        self.model_ = SklearnKMeans(K)
        if estimator_params is not None:
            assert isinstance(estimator_params, dict)
            self.model_.set_params(**estimator_params)

        # Compute Kmeans model
        self.model_.fit(X)
        if sample_labels is None:
            sample_labels = ["sample_{}".format(i) for i in range(X.shape[0])]
        assert len(sample_labels) == X.shape[0]
        self.sample_labels_ = np.array(sample_labels)
        self.n_clusters_ = K

        # Record sample label/distance from its cluster center
        self.sample_distances_ = OrderedDict()
        for cluster_label in range(self.n_clusters_):
            assert cluster_label not in self.sample_distances_
            member_rows = X[self.cluster_labels_ == cluster_label, :]
            member_labels = self.sample_labels_[self.cluster_labels_ == cluster_label]
            centroid = np.expand_dims(self.cluster_centers_[cluster_label], axis=0)

            # "All clusters must have at least 1 member!"
            if member_rows.shape[0] == 0:
                return None

            # Calculate distance between each member row and the current cluster
            dists = np.empty(member_rows.shape[0])
            dist_labels = []
            for j, (row, label) in enumerate(zip(member_rows, member_labels)):
                dists[j] = cdist(np.expand_dims(row, axis=0), centroid, "euclidean").squeeze()
                dist_labels.append(label)

            # Sort the distances/labels in ascending order
            sort_order = np.argsort(dists)
            dists = dists[sort_order]
            dist_labels = np.array(dist_labels)[sort_order]
            self.sample_distances_[cluster_label] = {
                "sample_labels": dist_labels,
                "distances": dists,
            }
        return self

    def get_closest_samples(self):
        """Returns a list of the labels of the samples that are located closest
           to their cluster's center.


        Returns
        ----------
        closest_samples : list
                  A list of the sample labels that are located the closest to
                  their cluster's center.
        """
        if self.sample_distances_ is None:
            raise Exception("No model has been fit yet!")

        return [samples['sample_labels'][0] for samples in list(self.sample_distances_.values())]

    def get_memberships(self):
        '''
        Return the memberships in each cluster
        '''
        memberships = OrderedDict()
        for cluster_label, samples in list(self.sample_distances_.items()):
            memberships[cluster_label] = OrderedDict(
                [(l, d) for l, d in zip(samples["sample_labels"], samples["distances"])])
        return json.dumps(memberships, indent=4)
开发者ID:FullStackHan,项目名称:ottertune,代码行数:104,代码来源:cluster.py

示例10: CodeBook

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import set_params [as 别名]

#.........这里部分代码省略.........
        # we should refactor the input validation.
        # 
        # X = self._check_fit_data(X)
        # return self.fit(X)._transform(X)
        raise NotImplementedError

    def transform(self, X, y=None):
        """Transform X to a cluster-distance space.

        In the new space, each dimension is the distance to the cluster
        centers.  Note that even if X is sparse, the array returned by
        `transform` will typically be dense.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            New data to transform.

        Returns
        -------
        X_new : array, shape [n_samples, k]
            X transformed in the new space.
        """
        # check_is_fitted(self, 'cook_book_')

        # X = self._check_test_data(X)
        # return self._transform(X)
        raise NotImplementedError

    def _transform(self, X):
        """guts of transform method; no input validation"""
        # return euclidean_distances(X, self.cook_book_)
        raise NotImplementedError


    def predict(self, X):
        """Predicts the index value of the closest word within the code book.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            New data to predict.

        Returns
        -------
        labels : array, shape [n_samples,]
            Index of the closest word within the code book.
        """
        return self.cluster_core.predict(X)

    def get_dictionary(self):
        """Retrieves the words forming the code book

        Returns
        -------
        dictionary : array, shape [n_words, n_features]
            Code book elements (words of the dictionary) represented
            in the feature space
        """
        #TODO: check that the coodebook is fitted
        return self.cluster_core.cluster_centers_

    def get_BoF_descriptor(self, X):

        # norm = lambda x: x.astype(float)/np.linalg.norm(x)
        # return norm(np.bincount(self.predict(X)))
        return np.histogram(self.predict(X),
                            bins=range(self.n_words+1),
                            density=True)

    def get_BoF_pramide_descriptor(self, X):
        """ Split the image (or volume) in a piramide manner and get
        a descriptor for each level (and part). Concatenate the output.
        TODO: build proper documentaiton

        """
        def split_data_by2(X):
            # TODO: rewrite this in a nice manner that uses len(X.shape)
            # TODO: this can rise ERROR if length of X is odd
            parts = [np.split(x, 2, axis=2) for x in [np.split(x, 2, axis=1) for x in
             np.slit(X, 2, axis=0) ]]
            return parts

        def get_occurrences(X):
            return np.histogram(X, bins=range(self.n_words+1))

        def build_piramide(X, level=2):
            if level is 0:
                return get_occurrences(X)
            else:
                return [get_occurrences(X)] + [build_piramide(Xpart, level-1)
                       for Xpart in split_data_by2(X)]

        return build_piramide(self.predict(X))

    def get_params(self, deep=True):
        return self.cluster_core.get_params()

    def set_params(self, **params):
        self.cluster_core.set_params(**params)
开发者ID:glemaitre,项目名称:protoclass,代码行数:104,代码来源:codebook.py


注:本文中的sklearn.cluster.KMeans.set_params方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。