本文整理匯總了Python中sklearn.preprocessing.StandardScaler.astype方法的典型用法代碼示例。如果您正苦於以下問題:Python StandardScaler.astype方法的具體用法?Python StandardScaler.astype怎麽用?Python StandardScaler.astype使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.preprocessing.StandardScaler
的用法示例。
在下文中一共展示了StandardScaler.astype方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: make_blobs
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import astype [as 別名]
#
# Generate sample data for the DBSCAN test
#
# Lifted from http://scikit-learn.org/stable/auto_examples/cluster/plot_dbscan.html#example-cluster-plot-dbscan-py
#
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
random_state=0)
X = StandardScaler().fit_transform(X)
X = X.astype(np.float64)
db = DBSCAN(eps=0.3, min_samples=10, metric='l2', algorithm='brute').fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
with open('dbscan.csv', 'w') as fscanout:
with open('dbscan_labels.csv', 'w') as fscanlabout:
for i in range(750):
fscanout.write(",".join([str(x) for x in X[i,:]]) + "\n")
fscanlabout.write(str(labels[i]) + "\n")
示例2: cluster
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import astype [as 別名]
def cluster(self):
cluster_file = open("cluster.txt", "w")
print(self.data.shape)
X = self.data
X = StandardScaler().fit_transform(X)
db = DBSCAN(eps=10, min_samples=2).fit(X)
labels = db.labels_
print(labels)
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
index = []
fitness = []
mean = []
print("number of estimated clusters : %d" % n_clusters_ )
cluster_file.write("number of estimated clusters : %d" % n_clusters_ + "\n")
for k in range(n_clusters_):
my_members = (labels == k)
for i in range(len(X)):
if my_members[i]:
index += [i]
if self.pure_data != None:
num = 0
if i % 2 == 0:
num = int(i/2)
fitness += [self.pure_data[num].fitness.values]
else :
num = int((i-1)/2)
fitness += [self.pure_data[num].fitness.values]
if fitness != []:
for i in range(len(fitness[0])):
mean += [statistics.mean([ind[i] for ind in fitness])]
cluster_file.write("index:"+ "\n")
cluster_file.write(str(index) + "\n")
cluster_file.write("fitness:"+ "\n")
cluster_file.write(str(fitness)+ "\n")
cluster_file.write("mean fitness:"+ "\n")
cluster_file.write(str(mean)+ "\n")
cluster_file.write("members:"+ "\n")
cluster_file.write(str(X[my_members])+ "\n")
print(index)
print("members:")
print(X[my_members])
print("fitness:"+ "\n")
print(str(fitness)+ "\n")
index = []
fitness = []
mean = []
mds = MDS(n_components=2)
pos = mds.fit_transform(X.astype(np.float64))
import matplotlib.pyplot as plt
colors = list('bgrcmykbgrcmykbgrcmykbgrcmyk')
plt.figure(2)
for i in range(len(pos[:,0])):
plt.plot(pos[i, 0], pos[i, 1], 'o', markerfacecolor=colors[labels[i]], markeredgecolor='k')
import matplotlib.pyplot as plt
from itertools import cycle
plt.figure(1)
plt.title("number of estimated clusters : %d" % n_clusters_)
colors = list('bgrcmykbgrcmykbgrcmykbgrcmyk')
colors_cluster = [colors[labels[i]] for i in range(len(X))]
k = [i for i in range(len(X))]
for j in range(9):
plt.subplot(330 + j)
plt.ylabel(label[j])
#plt.ylim(limits[j][0], limits[j][1])
plt.bar(k, X[:][j], color=colors_cluster)
plt.show()
示例3: make_blobs
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import astype [as 別名]
import numpy as np
#from sklearn.cluster import DBSCAN
#from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
##############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
random_state=0)
X = StandardScaler().fit_transform(X)
X = X.astype(np.float32)
##############################################################################
# Compute DBSCAN
import dbscan
labels = np.array(dbscan.dbscan(X, "sparse").run(0.3, 10))
core_samples_mask = np.zeros_like(labels, dtype=bool)
# core_samples_mask[db.core_sample_indices_] = True
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
##############################################################################
# Plot result
示例4: Imputer
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import astype [as 別名]
path = '/Users/zhangweijian01/Downloads/data.csv'
ori_data = pd.read_csv(path, header=0, sep='\t')
y_data = ori_data['Y']
x_data = ori_data.ix[:, 3:]
x_data = x_data.fillna(x_data.mean())
y_data = y_data.fillna(y_data.mean())
# to handle missing values
imp = Imputer(missing_values='NaN', strategy='median', axis=0)
imp.fit(x_data)
data_imp = imp.transform(x_data)
x_scaler = data_imp
# scalar
x_scaler = StandardScaler().fit_transform(data_imp)
x_scaler = x_scaler.astype(np.float64, copy=False)
for i in range(0, len(x_scaler)):
for j in range(0, len(x_scaler[i])):
x_scaler[i][j] = float('%.4f' % (x_scaler[i][j]))
# sava preprocessed data to file
np.savetxt("newdata2.csv", x_scaler, delimiter=",")
f = open('newdata1.csv', 'w')
for i in range(0, len(x_scaler)):
line = str(y_data[i])
for j in range(0, len(x_scaler[i])):
line = line + ',' + str(x_scaler[i][j])
line += '\n'
f.write(line)