本文整理汇总了Python中sklearn.cluster.KMeans.cluster_centers_方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.cluster_centers_方法的具体用法?Python KMeans.cluster_centers_怎么用?Python KMeans.cluster_centers_使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.KMeans
的用法示例。
在下文中一共展示了KMeans.cluster_centers_方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sample_points
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def sample_points(self):
k = KMeans(n_clusters=self.no_clusters)
k.cluster_centers_ = np.array(self.cluster_center_points)
assigned_clusters = k.predict(np.array(self.data))
self.cluster_centers = [ClusterCenter(c) for c in self.cluster_center_points]
self.data_points = [DataPoint(self.data[i], self.cluster_centers[assigned_clusters[i]]) for i in
range(len(self.data))]
dp_sum = np.sum([dp.calc_sampling_weight() for dp in self.data_points]) / self.out_per_mapper
for dp in self.data_points:
dp.dp_sum = dp_sum
#logging.warn("Tot!")
#logging.warn(sum([dp.calc_sampling_probability() for dp in self.data_points]))
#logging.error(len(self.data_points))
while self.can_write_more_features():
np.random.shuffle(self.data_points)
for dp in self.data_points:
if not self.can_write_more_features():
return
dp.dp_sum = dp_sum
if np.random.sample() < dp.calc_sampling_probability():
self.write_feature(dp.point, dp.calc_weight(self.out_per_mapper))
示例2: create_codebook
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def create_codebook(self, features, _class='label'):
if self.debug:
print '\t- creating visual codebook for {0} ...'.format(_class)
print '\t- features.shape', features.shape
sys.stdout.flush()
n_feats, n_cuboids, cuboid_depth = features.shape
features = features.reshape(-1, cuboid_depth)
if self.codebook_selection == self.cs_dict["kmeans"]:
codebook = KMeans(init='k-means++', n_clusters=self.codebook_size, n_init=50,
tol=1e-10, max_iter=1000, random_state=self.seed, n_jobs=self.n_jobs)
codebook.fit(features)
return codebook
else:
codebook = KMeans(init='random', n_clusters=self.codebook_size, n_init=1,
tol=1e-10, max_iter=1, random_state=self.seed, n_jobs=self.n_jobs)
codebook.cluster_centers_ = _init_centroids(features, k=self.codebook_size, init='random', random_state=self.seed)
return codebook
示例3: run
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def run():
cluster_centers = load_prediction()
test_data = load_test_data()
k = KMeans(n_clusters=200)
k.cluster_centers_ = cluster_centers
score = k.score(test_data)
print("Score: %f" % (score / len(test_data) * -1))
示例4: get_vlad_feat
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def get_vlad_feat(img_list,grid_spacing,patch_size,bow_model):
raw_feat_extractor=dsift.DsiftExtractor(grid_spacing,patch_size,1)
num_words,dim_feat=bow_model.shape
dim_vlad=num_words*dim_feat
vlad_feat=npy.zeros((len(img_list),dim_vlad),dtype=npy.float32)
obj_kmeans=KMeans(num_words,'k-means++',3,500,0.001)
obj_kmeans.cluster_centers_=bow_model
eps_float32=npy.finfo(npy.float32).eps
for kk in range(len(img_list)):
print("Extracting VLAD feature,"+str(kk)+"/"+str(len(img_list)))
img=imread(img_list[kk])
if img.ndim==3:
img=npy.mean(img,axis=2)
raw_feat,pos_feat=raw_feat_extractor.process_image(img,False,False)
label_feat=obj_kmeans.predict(raw_feat)
vlad_feat_kk=npy.zeros(dim_vlad,dtype=npy.float32)
for ii in range(label_feat.shape[0]):
label_ii=label_feat[ii]
res_ii=raw_feat[ii,:]-bow_model[label_ii,:]
res_ii_norm=npy.sqrt(npy.sum(res_ii*res_ii))
res_ii=res_ii/(res_ii_norm+eps_float32)
res_ii=res_ii+vlad_feat_kk[label_ii*dim_feat:(label_ii+1)*dim_feat]
vlad_feat_kk[label_ii*dim_feat:(label_ii+1)*dim_feat]=res_ii
vlad_feat_kk_ssr=npy.sqrt(npy.abs(vlad_feat_kk))
idx_temp=vlad_feat_kk>0
vlad_feat_kk[idx_temp]=vlad_feat_kk_ssr[idx_temp]
idx_temp=npy.logical_not(idx_temp)
vlad_feat_kk[idx_temp]=-vlad_feat_kk_ssr[idx_temp]
vlad_feat[kk,:]=vlad_feat_kk/(npy.sqrt(npy.sum(vlad_feat_kk*vlad_feat_kk)+eps_float32))
return vlad_feat
示例5: remove_half_nearest_points
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def remove_half_nearest_points(self, center_points, data):
k = KMeans(n_clusters=self.no_clusters)
k.cluster_centers_ = np.array(center_points)
assigned_clusters = k.predict(np.array(data))
clusters = [ClusterCenter(c) for c in center_points]
for i in range(0, len(assigned_clusters)):
clusters[assigned_clusters[i]].add_point(data[i])
ret = []
for c in clusters:
ret += c.get_half_farthest_points()
return ret
示例6: get_bow_feat
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def get_bow_feat(img_list,grid_spacing,patch_size,bow_model):
raw_feat_extractor=dsift.DsiftExtractor(grid_spacing,patch_size,1)
num_words=bow_model.shape[0]
obj_kmeans=KMeans(num_words,'k-means++',3,500,0.001)
obj_kmeans.cluster_centers_=bow_model
bow_feat=npy.zeros((len(img_list),num_words),dtype=npy.float32)
for kk in range(len(img_list)):
img=imread(img_list[kk])
if img.ndim==3:
img=npy.mean(img,axis=2)
raw_feat=raw_feat_extractor.process_image(img,False,False)[0]
label_feat=obj_kmeans.predict(raw_feat)
bow_feat[kk,:]=get_hist(label_feat,npy.array([0,num_words-1]),num_words,True)
return bow_feat
示例7: get_spm_feat
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def get_spm_feat(img_list,grid_spacing,patch_size,bow_model,pyramid_level):
raw_feat_extractor=dsift.DsiftExtractor(grid_spacing,patch_size,1)
num_words=bow_model.shape[0]
dim_spm=num_words*(4**(pyramid_level+1)-1)/3
obj_kmeans=KMeans(num_words,'k-means++',3,500,0.001)
obj_kmeans.cluster_centers_=bow_model
spm_feat=npy.zeros((len(img_list),dim_spm),dtype=npy.float32)
for kk in range(len(img_list)):
img=imread(img_list[kk])
if img.ndim==3:
img=npy.mean(img,axis=2)
raw_feat,pos_feat=raw_feat_extractor.process_image(img,False,False)
label_feat=obj_kmeans.predict(raw_feat)
spm_feat[kk,:]=get_spm_hist(label_feat,pos_feat,num_words,pyramid_level,img.shape)
return spm_feat
示例8: extract_features
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def extract_features(data):
kmeans = KMeans()
kmeans.cluster_centers_ = vCenters
bovw = []
for idx, image in enumerate(data):
image_feature_desciptors = extract_HOG_descriptors_per_image(image)
Y = kmeans.predict(image_feature_desciptors.T)
vFeatures = np.zeros(vCenters.shape[0])
for vfeature in Y:
vFeatures[vfeature] += 1
bovw.append(vFeatures)
return np.asarray(bovw)
示例9: readBespokeFile
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def readBespokeFile(infile):
"""Returns a Model namedtuple with all the model parts"""
with open(infile, 'r') as modelfile:
lines = iter(modelfile.read().splitlines())
n_params = int(lines.next())
metric_names = [lines.next() for i in range(n_params)]
means = _stringToArray(lines.next())
stdevs = _stringToArray(lines.next())
rotation_matrix = _stringToArray(lines.next())
models = []
centroids = []
try:
while True:
name = lines.next() # kill a line
centroids.append(_stringToArray(lines.next()))
weights = _stringToArray(lines.next())
functions = [LinearRegression.stringToFunction(lines.next())
for i in range(weights.shape[0])]
models.append(LinearRegression.Model(functions, weights))
except StopIteration:
pass
kmeans = KMeans(len(centroids))
kmeans.cluster_centers_ = np.array(centroids)
return Model(metric_names, means, stdevs, rotation_matrix, kmeans, models)
示例10: run
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
#.........这里部分代码省略.........
json_root["std_devs"] = [stdev for stdev in stdevs.tolist()]
json_root["rotation_matrix"] = [[elem for elem in row] for row in rotation_matrix.tolist()]
json_root["clusters"] = []
for i in range(n_clusters):
cluster_profile = rotated_training_profile[clusters==i,:]
cluster_performance = training_performance[clusters==i]
regression = LinearRegression.LinearRegression(cluster_profile,
cluster_performance)
pool = [LinearRegression.identityFunction()]
for col in range(cluster_profile.shape[1]):
if('inv_quadratic' in args['regressor_functions']):
pool.append(LinearRegression.powerFunction(col, -2))
if('inv_linear' in args['regressor_functions']):
pool.append(LinearRegression.powerFunction(col, -1))
if('inv_sqrt' in args['regressor_functions']):
pool.append(LinearRegression.powerFunction(col, -.5))
if('sqrt' in args['regressor_functions']):
pool.append(LinearRegression.powerFunction(col, .5))
if('linear' in args['regressor_functions']):
pool.append(LinearRegression.powerFunction(col, 1))
if('quadratic' in args['regressor_functions']):
pool.append(LinearRegression.powerFunction(col, 2))
if('log' in args['regressor_functions']):
pool.append(LinearRegression.logFunction(col))
if('cross' in args['regressor_functions']):
for xcol in range(col, cluster_profile.shape[1]):
pool.append(LinearRegression.crossFunction(col, xcol))
if('div' in args['regressor_functions']):
for xcol in range(col, cluster_profile.shape[1]):
pool.append(LinearRegression.divFunction(col,xcol))
pool.append(LinearRegression.divFunction(xcol,col))
(models[i], r_squared, r_squared_adj) = regression.select(pool,
threshold=args['threshold'],
folds=args['nfolds'])
# dump model to original file encoding
modelfile.write('Model %s\n' % i)
modelfile.write("[%s](%s)\n" % (rotation_matrix.shape[1],
','.join([str(center) for center in
kmeans.cluster_centers_[i].tolist()])))
modelfile.write(repr(models[i]))
modelfile.write('\n') # need a trailing newline
# dump model for json encoding
json_cluster = {}
json_cluster["center"] = [center for center in kmeans.cluster_centers_[i].tolist()]
# get models in json format
json_cluster["regressors"] = models[i].toJSONObject()
json_root["clusters"].append(json_cluster)
print "Index\tMetric Name"
print '\n'.join("%s\t%s" % metric for metric in enumerate(metric_names))
print "PCA matrix:"
print rotation_matrix
print "Model:\n" + str(models[i])
print "Finished modeling cluster %s:" % (i,)
print "r squared = %s" % (r_squared,)
print "adjusted r squared = %s" % (r_squared_adj,)
# if we want to save the model file, copy it now
if args['output'] == True:
if args['json'] == True:
with open(training_DC.name + '.model', 'w') as outfile:
json.dump(json_root, outfile, indent=4)
else:
shutil.copy(modelfile.name, training_DC.name + '.model')
else:
lines = iter(open(args['input'],'r').read().splitlines())
n_params = int(lines.next())
metric_names = [lines.next() for i in range(n_params)]
means = _stringToArray(lines.next())
stdevs = _stringToArray(lines.next())
rotation_matrix = _stringToArray(lines.next())
models = []
centroids = []
try:
while True:
name = lines.next() # kill a line
centroids.append(_stringToArray(lines.next()))
weights = _stringToArray(lines.next())
functions = [LinearRegression.stringToFunction(lines.next())
for i in range(weights.shape[0])]
models.append(LinearRegression.Model(functions, weights))
except StopIteration:
pass
kmeans = KMeans(len(centroids))
kmeans.cluster_centers_ = np.array(centroids)
if(args['experiment_datacollection'] or args['test_fit']):
DC = args['experiment_datacollection'] if \
args['experiment_datacollection'] else args['training_datacollection']
print "Running experiment on data collection %s..." % \
(DC,)
experiment_DC = database.DataCollection(DC,
args['db'])
_runExperiment(kmeans, means, stdevs, models, rotation_matrix,
experiment_DC, args, metric_names)
print "Done!"
示例11: _train
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
#.........这里部分代码省略.........
seed=self._settings['seed'])
ag.info('Extracting patches 2')
# Filter
th = self._settings['std_thresh']
if th > 0:
gen = (x for x in gen if x.std() >= th)
rs = np.random.RandomState(0)
# Now request the patches and convert them to floats
#patches = np.asarray(list(itr.islice(gen, n_samples)), dtype=np.float64) / 255
patches = np.asarray(list(itr.islice(gen, n_samples)))
ag.info('Extracting patches 3')
from vzlog.default import vz
# Flatten the patches
pp = patches.reshape((patches.shape[0], -1))
C = X.shape[-1]
sh = (-1,) + self._part_shape + (C,)
if C <= 3:
def plot(title):
vz.section(title)
grid = ag.plot.ColorImageGrid(pp[:1000].reshape(sh), rows=15)
grid.save(vz.impath(), scale=3)
else:
def plot(title): return
plot('Original patches')
# Standardize the patches
if self._settings['standardize']:
pp = self._standardize_patches(pp)
plot('Standardized patches')
# Determine whitening coefficients
sigma = np.dot(pp.T, pp) / len(pp)
self._extra['sigma'] = sigma
if self._settings['whiten']:
U, S, _ = np.linalg.svd(sigma)
shrinker = np.diag(1 / np.sqrt(S + self.w_epsilon))
#self._whitening_matrix = U @ shrinker @ U.T
self._whitening_matrix = np.dot(U, np.dot(shrinker, U.T))
# Now whiten the training patches
pp = self.whiten_patches(pp)
else:
self._whitening_matrix = None
plot('Whitened patches')
if self._settings['random_centroids']:
rs = np.random.RandomState(self._settings['seed'])
sh = (self._num_parts,) + self._part_shape
self._parts = rs.normal(0, 1, size=sh)
#self._parts /= ag.apply_once(np.mean, self._parts, [1, 2])
return
else:
# Run K-means
from sklearn.cluster import KMeans, MiniBatchKMeans
#cl = MiniBatchKMeans(
cl = KMeans(
n_clusters=self._num_parts,
n_init=self._settings['n_init'],
max_iter=self._settings['max_iter'],
random_state=self._settings['seed'],
#batch_size=50000,
n_jobs=self._settings['n_jobs'],
)
ag.info('Training', self._num_parts, 'K-means parts')
cl.fit(pp)
ag.info('Done.')
counts = np.bincount(cl.labels_, minlength=self._num_parts)
ww = counts / counts.sum()
HH = np.sum(-ww * np.log(ww))
print('entropy', HH)
II = np.argsort(counts)[::-1]
cl.cluster_centers_ = cl.cluster_centers_[II]
counts = counts[II]
ag.info('counts', counts)
self._parts = cl.cluster_centers_.reshape((-1,) + patches.shape[1:])
vz.section('Parts')
self._preprocess()
示例12: KMeans
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 13 20:26:56 2015
@author: felix
"""
from sklearn.cluster import KMeans
import numpy as np
C = np.array([[1,1], [1,2], [2,1], [2,2], [5,1], [6,1], [5,2]])
centers = [[3,0], [5,0]]
clf = KMeans(init='k-means++', n_clusters=2, n_init=5)
clf.cluster_centers_ = centers
clf.fit(C)
print "centros: ", clf.cluster_centers_
示例13: KMeans
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
# run doughnut and regular k-means cluster alg and store metrics
clus = KMeans(n_clusters =k-1)
clus_reg = KMeans(n_clusters = k)
# run lloyds alg on regular and doughnuted data. Uses KMeans++
# method: max centroid distance.
clus.fit(data[clustered])
clus_reg.fit(data)
#------------ Deal with Labels
# Method 1: need to classify the held out according to closest centroids
held_labels = []
# append the centroid of heldout points
centroid = np.mean(data[heldout],axis=0)
clus.cluster_centers_=np.append(clus.cluster_centers_,[centroid],axis=0)
# assign to cluster with closest centroid
for h in heldout:
held_labels.append(np.linalg.norm(np.subtract(data[h], clus.cluster_centers_),axis=1).argmin())
# assign the heldouts according to held_labels to stitch labels back together
stitched_label= np.zeros(len(data), dtype=np.int)
for b in range(len(heldout)):
stitched_label[heldout[b]]=held_labels[b]
for b in range(len(clustered)):
stitched_label[clustered[b]]=clus.labels_[b]
#------------ at this point the labels of our doughnut method are titles stitched_label
# retrieve the prop of clusters and rsq (ratio between/within var)
示例14: main
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import cluster_centers_ [as 别名]
def main():
start_date = datetime.datetime.now()
search_date = start_date + datetime.timedelta(-30)
week1_query ='''SELECT T1.uid_i as uid_i,ave as ave_f, special_crystal as special_crystal_f,
pve_consumable as pve_consumable_f, upgrade as upgrade_f, premium_hero as premium_hero_f, n_transactions_i,age_i
FROM
(SELECT uid_i, s_ave/total as ave, s_special_crystal/total as special_crystal, s_pve_consumable/total as pve_consumable,
s_upgrade/total as upgrade, s_premium_hero/total as premium_hero, n_transactions_i
FROM
(SELECT uid_i, SUM(ave) as s_ave, SUM(special_crystal) as s_special_crystal, sum(pve_consumable) as s_pve_consumable, sum(upgrade) as s_upgrade,
sum(premium_hero) as s_premium_hero,
(SUM(ave) + SUM(special_crystal) +sum(pve_consumable)+sum(upgrade)+sum(premium_hero)) as total, COUNT(*) as n_transactions_i
FROM
(SELECT uid_i, data_reason_desc_s,data_reason_pricing_id_s,
(case when left(data_reason_pricing_id_s,4) ='ave_' then data_item_q_i else 0 end) as ave,
(case when data_reason_pricing_id_s LIKE('%crystal%') and data_reason_pricing_id_s not LIKE('%golden%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%upsale%') then data_item_q_i
when data_reason_pricing_id_s like('rocket%') then data_item_q_i
else 0 end) as special_crystal,
(case when data_reason_pricing_id_s LIKE('%golden%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%upgrade%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%regen%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%arena%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%duel%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%key%') then data_item_q_i
when data_reason_pricing_id_s is null then data_item_q_i
else 0 end) upgrade,
(case when data_reason_pricing_id_s LIKE('health_potion%') then data_item_q_i
when data_reason_pricing_id_s LIKE('revive%') then data_item_q_i
when data_reason_pricing_id_s LIKE('team%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%questing_pack%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%booster%') then data_item_q_i
when data_reason_pricing_id_s LIKE('%pve_refill%') then data_item_q_i
else 0 end) as pve_consumable,
(case when data_reason_pricing_id_s LIKE('%premium_hero%') then data_item_q_i else 0 end) premium_hero,
FROM table_date_range(marvel_production_view.redeemer_transactions,timestamp(\''''+str(search_date)+'''\'),timestamp(\''''+str(start_date)+'''\'))
where counter_s = 'spend'
and data_item_n_s ='hc'
and data_reason_desc_s !='buyGift'
and data_reason_pricing_id_s !='fte_guaranteed'
and data_reason_pricing_id_s not LIKE('hero_crystal%')
and data_reason_pricing_id_s !='alliance_create_cost_b')
GROUP EACH BY 1)) T1
JOIN EACH
(SELECT uid_i, DATEDIFF(timestamp(\''''+str(start_date)+'''\'),time_join_t) as age_i
FROM marvel_production_view.users
where time_join_t < timestamp(\''''+str(search_date)+'''\')) T2
ON T1.uid_i = T2.uid_i
'''
print('performing query ...')
df_dimensions_collapsed_w1 = gbq_large.read_gbq(week1_query,project_id='mcoc-bi',destination_table='datascience_view.clusters_tmp')
df_dimensions_collapsed_w1=df_dimensions_collapsed_w1.fillna(0)
df_dimensions = df_dimensions_collapsed_w1[['ave_f','special_crystal_f','pve_consumable_f','upgrade_f','premium_hero_f']]
est_c = KMeans(n_clusters=10)
print('clustering ...')
est_c.cluster_centers_ = np.asarray([[ 0.02694769, 0.06531768, 0.06121219, 0.82539261, 0.02112983],
[ 0.05772959, 0.37772436, 0.09730477, 0.40487444, 0.06236684],
[ 0.08125626, 0.29389585, 0.42306508, 0.12683245, 0.07495037],
[ 0.01135739, 0.08087575, 0.0494629 , 0.0646581 , 0.79364585],
[ 0.51941725, 0.14303638, 0.15421209, 0.14783146, 0.03550281],
[ 0.00832494, 0.91744861, 0.02002415, 0.03100689, 0.02319541],
[ 0.06583563, 0.62194053, 0.09732582, 0.12262572, 0.0922723 ],
[ 0.08316859, 0.09417081, 0.33420608, 0.44578459, 0.04266993],
[ 0.03944744, 0.05819858, 0.79046582, 0.09186975, 0.02001841],
[ 0.04018328, 0.35265425, 0.08800917, 0.11709595, 0.40205735]])
labels_c=est_c.predict(df_dimensions)
print('post processing ...')
df_dimensions_collapsed_w1['cluster_label_i'] = labels_c
df_write = df_dimensions_collapsed_w1
df_write['ave_f'] = df_write.ave_f.apply(lambda x: np.fabs(x))
df_write['special_crystal_f'] = df_write.special_crystal_f.apply(lambda x: np.fabs(x))
df_write['pve_consumable_f'] = df_write.pve_consumable_f.apply(lambda x: np.fabs(x))
df_write['upgrade_f'] = df_write.upgrade_f.apply(lambda x: np.fabs(x))
df_write['premium_hero_f'] = df_write.premium_hero_f.apply(lambda x: np.fabs(x))
df_write['_ts_t'] = start_date.strftime('%Y-%m-%d %H:%M:%S')
filename_str = 'segmentation.csv'
table_write = 'mcoc-bi:marvel_bi.user_segmentation_historical'+ start_date.strftime('%Y%m%d')
print('writing csv ...')
df_write.to_csv(filename_str,index=False)
print('bq loading ...')
subprocess.call("bq load --source_format=CSV --skip_leading_rows=1 "+table_write+ " " + filename_str + " uid_i:integer,ave_f:float,special_crystal_f:float,pve_consumable_f:float,upgrade_f:float,premium_hero_f:float,n_transactions_i:integer,age_i:integer,cluster_label_i:integer,_ts_t:timestamp",shell=True)