当前位置: 首页>>代码示例>>Python>>正文


Python KMeans.cluster_centers_方法代码示例

本文整理汇总了Python中sklearn.cluster.KMeans.cluster_centers_方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.cluster_centers_方法的具体用法?Python KMeans.cluster_centers_怎么用?Python KMeans.cluster_centers_使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.KMeans的用法示例。


在下文中一共展示了KMeans.cluster_centers_方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sample_points

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
    def sample_points(self):
        k = KMeans(n_clusters=self.no_clusters)
        k.cluster_centers_ = np.array(self.cluster_center_points)
        assigned_clusters = k.predict(np.array(self.data))

        self.cluster_centers = [ClusterCenter(c) for c in self.cluster_center_points]
        self.data_points = [DataPoint(self.data[i], self.cluster_centers[assigned_clusters[i]]) for i in
                            range(len(self.data))]

        dp_sum = np.sum([dp.calc_sampling_weight() for dp in self.data_points]) / self.out_per_mapper

        for dp in self.data_points:
            dp.dp_sum = dp_sum

        #logging.warn("Tot!")
        #logging.warn(sum([dp.calc_sampling_probability() for dp in self.data_points]))
        #logging.error(len(self.data_points))

        while self.can_write_more_features():
            np.random.shuffle(self.data_points)
            for dp in self.data_points:
                if not self.can_write_more_features():
                    return

                dp.dp_sum = dp_sum
                if np.random.sample() < dp.calc_sampling_probability():
                    self.write_feature(dp.point, dp.calc_weight(self.out_per_mapper))
开发者ID:lukaselmer,项目名称:ethz-data-mining,代码行数:29,代码来源:mapper.py

示例2: create_codebook

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
    def create_codebook(self, features, _class='label'):

        if self.debug:
            print '\t- creating visual codebook for {0} ...'.format(_class)
            print '\t- features.shape', features.shape
            sys.stdout.flush()

        n_feats, n_cuboids, cuboid_depth = features.shape
        features = features.reshape(-1, cuboid_depth)

        if self.codebook_selection == self.cs_dict["kmeans"]:

            codebook = KMeans(init='k-means++', n_clusters=self.codebook_size, n_init=50,
                              tol=1e-10, max_iter=1000, random_state=self.seed, n_jobs=self.n_jobs)

            codebook.fit(features)

            return codebook

        else:

            codebook = KMeans(init='random', n_clusters=self.codebook_size, n_init=1,
                              tol=1e-10, max_iter=1, random_state=self.seed, n_jobs=self.n_jobs)

            codebook.cluster_centers_ = _init_centroids(features, k=self.codebook_size, init='random', random_state=self.seed)

            return codebook
开发者ID:allansp84,项目名称:spectralcubes,代码行数:29,代码来源:midlevelfeatures.py

示例3: run

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def run():
    cluster_centers = load_prediction()
    test_data = load_test_data()
    k = KMeans(n_clusters=200)
    k.cluster_centers_ = cluster_centers
    score = k.score(test_data)
    print("Score: %f" % (score / len(test_data) * -1))
开发者ID:lukaselmer,项目名称:ethz-data-mining,代码行数:9,代码来源:evaluate.py

示例4: get_vlad_feat

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def get_vlad_feat(img_list,grid_spacing,patch_size,bow_model):
    raw_feat_extractor=dsift.DsiftExtractor(grid_spacing,patch_size,1)    
    num_words,dim_feat=bow_model.shape
    dim_vlad=num_words*dim_feat
    vlad_feat=npy.zeros((len(img_list),dim_vlad),dtype=npy.float32)
    obj_kmeans=KMeans(num_words,'k-means++',3,500,0.001)
    obj_kmeans.cluster_centers_=bow_model
    eps_float32=npy.finfo(npy.float32).eps
    for kk in range(len(img_list)):
        print("Extracting VLAD feature,"+str(kk)+"/"+str(len(img_list)))
        img=imread(img_list[kk])
        if img.ndim==3:
            img=npy.mean(img,axis=2)
        raw_feat,pos_feat=raw_feat_extractor.process_image(img,False,False)
        label_feat=obj_kmeans.predict(raw_feat)
        vlad_feat_kk=npy.zeros(dim_vlad,dtype=npy.float32)
        for ii in range(label_feat.shape[0]):
            label_ii=label_feat[ii]
            res_ii=raw_feat[ii,:]-bow_model[label_ii,:]
            res_ii_norm=npy.sqrt(npy.sum(res_ii*res_ii))
            res_ii=res_ii/(res_ii_norm+eps_float32)
            res_ii=res_ii+vlad_feat_kk[label_ii*dim_feat:(label_ii+1)*dim_feat]
            vlad_feat_kk[label_ii*dim_feat:(label_ii+1)*dim_feat]=res_ii
        vlad_feat_kk_ssr=npy.sqrt(npy.abs(vlad_feat_kk))
        idx_temp=vlad_feat_kk>0
        vlad_feat_kk[idx_temp]=vlad_feat_kk_ssr[idx_temp]
        idx_temp=npy.logical_not(idx_temp)
        vlad_feat_kk[idx_temp]=-vlad_feat_kk_ssr[idx_temp]
        vlad_feat[kk,:]=vlad_feat_kk/(npy.sqrt(npy.sum(vlad_feat_kk*vlad_feat_kk)+eps_float32))
    return vlad_feat
开发者ID:galad-loth,项目名称:ImageClassification,代码行数:32,代码来源:feat_coding.py

示例5: remove_half_nearest_points

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
    def remove_half_nearest_points(self, center_points, data):
        k = KMeans(n_clusters=self.no_clusters)
        k.cluster_centers_ = np.array(center_points)
        assigned_clusters = k.predict(np.array(data))
        clusters = [ClusterCenter(c) for c in center_points]
        for i in range(0, len(assigned_clusters)):
            clusters[assigned_clusters[i]].add_point(data[i])

        ret = []
        for c in clusters:
            ret += c.get_half_farthest_points()
        return ret
开发者ID:lukaselmer,项目名称:ethz-data-mining,代码行数:14,代码来源:mapper.py

示例6: get_bow_feat

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def get_bow_feat(img_list,grid_spacing,patch_size,bow_model):
    raw_feat_extractor=dsift.DsiftExtractor(grid_spacing,patch_size,1)
    num_words=bow_model.shape[0]
    obj_kmeans=KMeans(num_words,'k-means++',3,500,0.001)
    obj_kmeans.cluster_centers_=bow_model
    bow_feat=npy.zeros((len(img_list),num_words),dtype=npy.float32)
    for kk in range(len(img_list)):
        img=imread(img_list[kk])
        if img.ndim==3:
            img=npy.mean(img,axis=2)
        raw_feat=raw_feat_extractor.process_image(img,False,False)[0]
        label_feat=obj_kmeans.predict(raw_feat)
        bow_feat[kk,:]=get_hist(label_feat,npy.array([0,num_words-1]),num_words,True)
    return bow_feat
开发者ID:galad-loth,项目名称:ImageClassification,代码行数:16,代码来源:feat_coding.py

示例7: get_spm_feat

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def get_spm_feat(img_list,grid_spacing,patch_size,bow_model,pyramid_level):
    raw_feat_extractor=dsift.DsiftExtractor(grid_spacing,patch_size,1)
    num_words=bow_model.shape[0]
    dim_spm=num_words*(4**(pyramid_level+1)-1)/3
    obj_kmeans=KMeans(num_words,'k-means++',3,500,0.001)
    obj_kmeans.cluster_centers_=bow_model
    spm_feat=npy.zeros((len(img_list),dim_spm),dtype=npy.float32)
    for kk in range(len(img_list)):
        img=imread(img_list[kk])
        if img.ndim==3:
            img=npy.mean(img,axis=2)
        raw_feat,pos_feat=raw_feat_extractor.process_image(img,False,False)
        label_feat=obj_kmeans.predict(raw_feat)
        spm_feat[kk,:]=get_spm_hist(label_feat,pos_feat,num_words,pyramid_level,img.shape)
    return spm_feat   
开发者ID:galad-loth,项目名称:ImageClassification,代码行数:17,代码来源:feat_coding.py

示例8: extract_features

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def extract_features(data):
    kmeans = KMeans()
    kmeans.cluster_centers_ = vCenters

    bovw = []
    for idx, image in enumerate(data):
        image_feature_desciptors = extract_HOG_descriptors_per_image(image)
        Y = kmeans.predict(image_feature_desciptors.T)

        vFeatures = np.zeros(vCenters.shape[0])
        for vfeature in Y:
            vFeatures[vfeature] += 1
        bovw.append(vFeatures)


    return np.asarray(bovw)
开发者ID:fred1234,项目名称:BigData,代码行数:18,代码来源:online_svm_bovw.py

示例9: readBespokeFile

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def readBespokeFile(infile):
    """Returns a Model namedtuple with all the model parts"""
    with open(infile, 'r') as modelfile:
        lines = iter(modelfile.read().splitlines())
    n_params = int(lines.next())
    metric_names = [lines.next() for i in range(n_params)]
    means = _stringToArray(lines.next())
    stdevs = _stringToArray(lines.next())
    rotation_matrix = _stringToArray(lines.next())
    models = []
    centroids = []
    try:
        while True:
            name = lines.next() # kill a line
            centroids.append(_stringToArray(lines.next()))
            weights = _stringToArray(lines.next())
            functions = [LinearRegression.stringToFunction(lines.next()) 
                         for i in range(weights.shape[0])]
            models.append(LinearRegression.Model(functions, weights))
    except StopIteration:
        pass
    kmeans = KMeans(len(centroids))
    kmeans.cluster_centers_ = np.array(centroids)
    return Model(metric_names, means, stdevs, rotation_matrix, kmeans, models)
开发者ID:gtcasl,项目名称:eiger,代码行数:26,代码来源:Eiger.py

示例10: run

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]

#.........这里部分代码省略.........
            json_root["std_devs"] = [stdev for stdev in stdevs.tolist()]
            json_root["rotation_matrix"] = [[elem for elem in row] for row in rotation_matrix.tolist()]
            json_root["clusters"] = []

            for i in range(n_clusters):
                cluster_profile = rotated_training_profile[clusters==i,:]
                cluster_performance = training_performance[clusters==i]
                regression = LinearRegression.LinearRegression(cluster_profile,
                                                               cluster_performance)
                pool = [LinearRegression.identityFunction()]
                for col in range(cluster_profile.shape[1]):
                    if('inv_quadratic' in args['regressor_functions']):
                        pool.append(LinearRegression.powerFunction(col, -2))
                    if('inv_linear' in args['regressor_functions']):
                        pool.append(LinearRegression.powerFunction(col, -1))
                    if('inv_sqrt' in args['regressor_functions']):
                        pool.append(LinearRegression.powerFunction(col, -.5))
                    if('sqrt' in args['regressor_functions']):
                        pool.append(LinearRegression.powerFunction(col, .5))
                    if('linear' in args['regressor_functions']):
                        pool.append(LinearRegression.powerFunction(col, 1))
                    if('quadratic' in args['regressor_functions']):
                        pool.append(LinearRegression.powerFunction(col, 2))
                    if('log' in args['regressor_functions']):
                        pool.append(LinearRegression.logFunction(col))
                    if('cross' in args['regressor_functions']):
                        for xcol in range(col, cluster_profile.shape[1]):
                            pool.append(LinearRegression.crossFunction(col, xcol))
                    if('div' in args['regressor_functions']):
                        for xcol in range(col, cluster_profile.shape[1]):
                            pool.append(LinearRegression.divFunction(col,xcol))
                            pool.append(LinearRegression.divFunction(xcol,col))
                (models[i], r_squared, r_squared_adj) = regression.select(pool, 
                        threshold=args['threshold'],
                        folds=args['nfolds'])
                
                # dump model to original file encoding
                modelfile.write('Model %s\n' % i)
                modelfile.write("[%s](%s)\n" % (rotation_matrix.shape[1],
                                                ','.join([str(center) for center in
                                                    kmeans.cluster_centers_[i].tolist()])))
                modelfile.write(repr(models[i]))
                modelfile.write('\n') # need a trailing newline

                # dump model for json encoding
                json_cluster = {}
                json_cluster["center"] = [center for center in kmeans.cluster_centers_[i].tolist()]
                # get models in json format
                json_cluster["regressors"] = models[i].toJSONObject()
                json_root["clusters"].append(json_cluster)

                print "Index\tMetric Name"
                print '\n'.join("%s\t%s" % metric for metric in enumerate(metric_names))
                print "PCA matrix:"
                print rotation_matrix 
                print "Model:\n" + str(models[i])

                print "Finished modeling cluster %s:" % (i,)
                print "r squared = %s" % (r_squared,)
                print "adjusted r squared = %s" % (r_squared_adj,)
           
        # if we want to save the model file, copy it now
        if args['output'] == True:
            if args['json'] == True:
                with open(training_DC.name + '.model', 'w') as outfile:
                    json.dump(json_root, outfile, indent=4)
            else:
                shutil.copy(modelfile.name, training_DC.name + '.model')
    else:
        lines = iter(open(args['input'],'r').read().splitlines())
        n_params = int(lines.next())
        metric_names = [lines.next() for i in range(n_params)]
        means = _stringToArray(lines.next())
        stdevs = _stringToArray(lines.next())
        rotation_matrix = _stringToArray(lines.next())
        models = []
        centroids = []
        try:
            while True:
                name = lines.next() # kill a line
                centroids.append(_stringToArray(lines.next()))
                weights = _stringToArray(lines.next())
                functions = [LinearRegression.stringToFunction(lines.next()) 
                             for i in range(weights.shape[0])]
                models.append(LinearRegression.Model(functions, weights))
        except StopIteration:
            pass
        kmeans = KMeans(len(centroids))
        kmeans.cluster_centers_ = np.array(centroids)

    if(args['experiment_datacollection'] or args['test_fit']):
        DC = args['experiment_datacollection'] if \
            args['experiment_datacollection'] else args['training_datacollection']
        print "Running experiment on data collection %s..." % \
              (DC,)
        experiment_DC = database.DataCollection(DC, 
                                                args['db'])
        _runExperiment(kmeans, means, stdevs, models, rotation_matrix,
                       experiment_DC, args, metric_names)
    print "Done!"
开发者ID:hoangt,项目名称:eiger,代码行数:104,代码来源:Eiger.py

示例11: _train

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]

#.........这里部分代码省略.........
                                       seed=self._settings['seed'])


        ag.info('Extracting patches 2')
        # Filter
        th = self._settings['std_thresh']
        if th > 0:
            gen = (x for x in gen if x.std() >= th)

        rs = np.random.RandomState(0)

        # Now request the patches and convert them to floats
        #patches = np.asarray(list(itr.islice(gen, n_samples)), dtype=np.float64) / 255
        patches = np.asarray(list(itr.islice(gen, n_samples)))
        ag.info('Extracting patches 3')

        from vzlog.default import vz

        # Flatten the patches
        pp = patches.reshape((patches.shape[0], -1))

        C = X.shape[-1]
        sh = (-1,) + self._part_shape + (C,)

        if C <= 3:
            def plot(title):
                vz.section(title)
                grid = ag.plot.ColorImageGrid(pp[:1000].reshape(sh), rows=15)
                grid.save(vz.impath(), scale=3)
        else:
            def plot(title): return

        plot('Original patches')

        # Standardize the patches
        if self._settings['standardize']:
            pp = self._standardize_patches(pp)

        plot('Standardized patches')

        # Determine whitening coefficients
        sigma = np.dot(pp.T, pp) / len(pp)

        self._extra['sigma'] = sigma

        if self._settings['whiten']:
            U, S, _ = np.linalg.svd(sigma)

            shrinker = np.diag(1 / np.sqrt(S + self.w_epsilon))

            #self._whitening_matrix = U @ shrinker @ U.T
            self._whitening_matrix = np.dot(U, np.dot(shrinker, U.T))

            # Now whiten the training patches
            pp = self.whiten_patches(pp)
        else:
            self._whitening_matrix = None

        plot('Whitened patches')

        if self._settings['random_centroids']:
            rs = np.random.RandomState(self._settings['seed'])
            sh = (self._num_parts,) + self._part_shape
            self._parts = rs.normal(0, 1, size=sh)
            #self._parts /= ag.apply_once(np.mean, self._parts, [1, 2])
            return
        else:
            # Run K-means
            from sklearn.cluster import KMeans, MiniBatchKMeans

            #cl = MiniBatchKMeans(
            cl = KMeans(
                        n_clusters=self._num_parts,
                        n_init=self._settings['n_init'],
                        max_iter=self._settings['max_iter'],
                        random_state=self._settings['seed'],
                        #batch_size=50000,
                        n_jobs=self._settings['n_jobs'],
                        )

            ag.info('Training', self._num_parts, 'K-means parts')
            cl.fit(pp)
            ag.info('Done.')

            counts = np.bincount(cl.labels_, minlength=self._num_parts)
            ww = counts / counts.sum()
            HH = np.sum(-ww * np.log(ww))
            print('entropy', HH)

            II = np.argsort(counts)[::-1]
            cl.cluster_centers_ = cl.cluster_centers_[II]
            counts = counts[II]

            ag.info('counts', counts)

            self._parts = cl.cluster_centers_.reshape((-1,) + patches.shape[1:])

            vz.section('Parts')

        self._preprocess()
开发者ID:amitgroup,项目名称:parts-net,代码行数:104,代码来源:kmeans_parts_layer.py

示例12: KMeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 13 20:26:56 2015

@author: felix
"""

from sklearn.cluster import KMeans
import numpy as np


C = np.array([[1,1], [1,2], [2,1], [2,2], [5,1], [6,1], [5,2]])
centers = [[3,0], [5,0]]

clf = KMeans(init='k-means++', n_clusters=2, n_init=5)
clf.cluster_centers_ = centers

clf.fit(C)

print "centros: ", clf.cluster_centers_
开发者ID:rfelixmg,项目名称:sample_simple,代码行数:22,代码来源:cluster_sample.py

示例13: KMeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
        # run doughnut and regular k-means cluster alg and store metrics
        clus = KMeans(n_clusters =k-1)
        clus_reg = KMeans(n_clusters = k)

        #   run  lloyds alg on regular and doughnuted data. Uses KMeans++ 
        #   method: max centroid distance.
        clus.fit(data[clustered])
        clus_reg.fit(data)

        #------------ Deal with Labels
        # Method 1: need to classify the held out according to closest centroids
        held_labels = []
        
         # append the centroid of heldout points
        centroid = np.mean(data[heldout],axis=0)
        clus.cluster_centers_=np.append(clus.cluster_centers_,[centroid],axis=0)
        
        # assign to cluster with closest centroid
        for h in heldout:
            held_labels.append(np.linalg.norm(np.subtract(data[h], clus.cluster_centers_),axis=1).argmin())
                
        # assign the heldouts according to held_labels to stitch labels back together
        stitched_label= np.zeros(len(data), dtype=np.int)
        for b in range(len(heldout)):   
            stitched_label[heldout[b]]=held_labels[b]
           
        for b in range(len(clustered)):
            stitched_label[clustered[b]]=clus.labels_[b]
        #------------ at this point the labels of our doughnut method are titles stitched_label
        
        # retrieve the prop of clusters and rsq (ratio between/within var)
开发者ID:ljstrnadiii,项目名称:Clustering,代码行数:33,代码来源:analysis.py

示例14: main

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def main():
	start_date = datetime.datetime.now()
	search_date = start_date + datetime.timedelta(-30) 
	week1_query ='''SELECT T1.uid_i as uid_i,ave as ave_f, special_crystal as special_crystal_f, 
	    pve_consumable as pve_consumable_f, upgrade as upgrade_f, premium_hero as premium_hero_f, n_transactions_i,age_i

	FROM


	(SELECT uid_i, s_ave/total as ave, s_special_crystal/total as special_crystal, s_pve_consumable/total as pve_consumable, 
	s_upgrade/total as upgrade, s_premium_hero/total as premium_hero, n_transactions_i
	FROM

	(SELECT uid_i, SUM(ave) as s_ave, SUM(special_crystal) as s_special_crystal, sum(pve_consumable) as s_pve_consumable, sum(upgrade) as s_upgrade,
	sum(premium_hero) as s_premium_hero,
	(SUM(ave) + SUM(special_crystal) +sum(pve_consumable)+sum(upgrade)+sum(premium_hero)) as total, COUNT(*) as n_transactions_i
	FROM

	(SELECT uid_i, data_reason_desc_s,data_reason_pricing_id_s, 
	(case when left(data_reason_pricing_id_s,4) ='ave_' then data_item_q_i else 0 end) as ave,

	(case when data_reason_pricing_id_s LIKE('%crystal%') and data_reason_pricing_id_s not LIKE('%golden%') then data_item_q_i 
	when data_reason_pricing_id_s LIKE('%upsale%') then data_item_q_i 
	when data_reason_pricing_id_s like('rocket%') then data_item_q_i
	else 0 end) as special_crystal,

	(case when data_reason_pricing_id_s LIKE('%golden%') then data_item_q_i 
	when data_reason_pricing_id_s LIKE('%upgrade%') then data_item_q_i 
	when data_reason_pricing_id_s LIKE('%regen%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('%arena%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('%duel%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('%key%') then data_item_q_i 
	when data_reason_pricing_id_s is null then data_item_q_i
	else 0 end) upgrade,


	(case when data_reason_pricing_id_s LIKE('health_potion%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('revive%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('team%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('%questing_pack%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('%booster%') then data_item_q_i
	when data_reason_pricing_id_s LIKE('%pve_refill%') then data_item_q_i
	else 0 end) as pve_consumable,


	(case when data_reason_pricing_id_s LIKE('%premium_hero%') then data_item_q_i else 0 end) premium_hero,


	FROM table_date_range(marvel_production_view.redeemer_transactions,timestamp(\''''+str(search_date)+'''\'),timestamp(\''''+str(start_date)+'''\'))
	where counter_s = 'spend'
	and data_item_n_s ='hc'
	and data_reason_desc_s !='buyGift'
	and data_reason_pricing_id_s !='fte_guaranteed'
	and data_reason_pricing_id_s not LIKE('hero_crystal%')
	and data_reason_pricing_id_s !='alliance_create_cost_b')
	GROUP EACH BY 1)) T1
	JOIN EACH
	(SELECT uid_i, DATEDIFF(timestamp(\''''+str(start_date)+'''\'),time_join_t) as age_i
	FROM marvel_production_view.users

	where time_join_t < timestamp(\''''+str(search_date)+'''\')) T2
	ON T1.uid_i = T2.uid_i

	'''
	print('performing query ...')
	df_dimensions_collapsed_w1 = gbq_large.read_gbq(week1_query,project_id='mcoc-bi',destination_table='datascience_view.clusters_tmp')
	df_dimensions_collapsed_w1=df_dimensions_collapsed_w1.fillna(0)
	df_dimensions = df_dimensions_collapsed_w1[['ave_f','special_crystal_f','pve_consumable_f','upgrade_f','premium_hero_f']]
	est_c = KMeans(n_clusters=10)
	print('clustering ...')
	est_c.cluster_centers_ = np.asarray([[ 0.02694769,  0.06531768,  0.06121219,  0.82539261,  0.02112983],
	       [ 0.05772959,  0.37772436,  0.09730477,  0.40487444,  0.06236684],
	       [ 0.08125626,  0.29389585,  0.42306508,  0.12683245,  0.07495037],
	       [ 0.01135739,  0.08087575,  0.0494629 ,  0.0646581 ,  0.79364585],
	       [ 0.51941725,  0.14303638,  0.15421209,  0.14783146,  0.03550281],
	       [ 0.00832494,  0.91744861,  0.02002415,  0.03100689,  0.02319541],
	       [ 0.06583563,  0.62194053,  0.09732582,  0.12262572,  0.0922723 ],
	       [ 0.08316859,  0.09417081,  0.33420608,  0.44578459,  0.04266993],
	       [ 0.03944744,  0.05819858,  0.79046582,  0.09186975,  0.02001841],
	       [ 0.04018328,  0.35265425,  0.08800917,  0.11709595,  0.40205735]])
	labels_c=est_c.predict(df_dimensions)
	print('post processing ...')
	df_dimensions_collapsed_w1['cluster_label_i'] = labels_c
	df_write = df_dimensions_collapsed_w1
	df_write['ave_f'] = df_write.ave_f.apply(lambda x: np.fabs(x))
	df_write['special_crystal_f'] = df_write.special_crystal_f.apply(lambda x: np.fabs(x))
	df_write['pve_consumable_f'] = df_write.pve_consumable_f.apply(lambda x: np.fabs(x))
	df_write['upgrade_f'] = df_write.upgrade_f.apply(lambda x: np.fabs(x))
	df_write['premium_hero_f'] = df_write.premium_hero_f.apply(lambda x: np.fabs(x))
	df_write['_ts_t'] = start_date.strftime('%Y-%m-%d %H:%M:%S')
	filename_str = 'segmentation.csv'
	table_write = 'mcoc-bi:marvel_bi.user_segmentation_historical'+ start_date.strftime('%Y%m%d')
	print('writing csv ...')
	df_write.to_csv(filename_str,index=False)
	print('bq loading ...')
	subprocess.call("bq load --source_format=CSV --skip_leading_rows=1 "+table_write+ " " + filename_str + " uid_i:integer,ave_f:float,special_crystal_f:float,pve_consumable_f:float,upgrade_f:float,premium_hero_f:float,n_transactions_i:integer,age_i:integer,cluster_label_i:integer,_ts_t:timestamp",shell=True)
开发者ID:mwong-kabam,项目名称:UserSegmentation,代码行数:98,代码来源:clustering.py


注:本文中的sklearn.cluster.KMeans.cluster_centers_方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。