本文整理匯總了Python中sklearn.preprocessing.StandardScaler.dot方法的典型用法代碼示例。如果您正苦於以下問題:Python StandardScaler.dot方法的具體用法?Python StandardScaler.dot怎麽用?Python StandardScaler.dot使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.preprocessing.StandardScaler
的用法示例。
在下文中一共展示了StandardScaler.dot方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: pca
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 別名]
def pca(X, ndims=3):
"""Runs PCA on provided data, X, and returns the projection onto ndims principal components.
This function assumes X has data series in columns.
This function also returns the covariance matrix of the data (scaled to zero norm and unit variance), as well as the eigen vectors and values of that matrix.
Input:
X : ndarray with data series in columns (e.g. one neuron's calcium trace (or DF/F) per column)
ndims : the number of dimensions to project down to. Default is 3 for fancy 3d scatter plots.
Output:
Y : Projected, scaled data.
cov_mat : Covariance matrix of the scaled data
eig_pairs : a list of tuples. Each tuple is of the form (eigen value, eigen vector), and they are sorted high to low"""
original_dims = X.shape[1];
if ndims > original_dims:
ndims = original_dims
#TODO Check what this scaler is actually doing; it might be scaling columns independently
X_std = StandardScaler().fit_transform(X)
cov_mat = np.cov(X.T)
eig_vals, eig_vecs = np.linalg.eig(cov_mat)
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:, i]) for i in range(len(eig_vals))]
eig_pairs.sort(key=lambda x: x[0], reverse=True)
W = np.hstack((eig_pairs[i][1].reshape(original_dims,1) for i in range(ndims)))
Y = X_std.dot(W)
return Y, cov_mat, eig_pairs
示例2: Data
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 別名]
data = Data([trace1, trace2])
layout=Layout(
yaxis=YAxis(title='Explained variance in percent'),
title='Explained variance by different principal components')
fig = Figure(data=data, layout=layout)
py.iplot(fig)
#%%
matrix_w = np.hstack((eig_pairs[0][1].reshape(13,1),
eig_pairs[1][1].reshape(13,1)))
print('Matrix W:\n', matrix_w)
#%%
Y = X_std.dot(matrix_w)
traces = []
for name in (1,2,3):
trace = Scatter(
x=Y[y==name,0],
y=Y[y==name,1],
mode='markers',
name=name,
marker=Marker(
size=12,
line=Line(
color='rgba(217, 217, 217, 0.14)',
width=0.5),
opacity=0.8))
示例3: df_pca
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 別名]
def df_pca(df_in, keep=None, expvar=False, rmoutliers=True, show=True,
colorcol=None):
"""
Run a simple PCA on the df features of keep.
If expvar is True, a plot of explained variance is also shown.
Heavily inspired by http://sebastianraschka.com/Articles/2015_pca_in_3_steps.html
"""
from sklearn.preprocessing import StandardScaler
if keep is None:
keep = ['maxV', 'maxDerivV', 'maxDerivdV', 'minDerivV',
'minDerivdV', 'preMinV', 'postMinV', 'preMaxCurveK',
'postMaxCurveK', 'postMaxCurveV', 'preMaxCurveV', 'height',
'repolarizationV', 'intervals']
# Clean the data frame
df = df_in.copy()
for col in df.columns:
if col not in keep:
df = df.drop(col, 1)
else:
if col != colorcol:
df[col] = outlier(df[col].values)
df = df.dropna()
if colorcol is not None:
colors = df[colorcol].values
df = df.drop(colorcol, 1)
# Make into np.array
data = []
for col in df.columns:
temp_ = df[col]
data.append(temp_)
data = np.array(data).T # Make as array and transpose
data = StandardScaler().fit_transform(data) # Standardize data
# run pca (svd)
u, eigvals, eigvecs = np.linalg.svd(data, full_matrices=False)
eigpairs = [(np.abs(eigvals[i]), eigvecs[:,i])
for i in range(len(eigvals))]
eigpairs.sort()
eigpairs.reverse()
mat_w = np.hstack((eigpairs[0][1].reshape(eigvals.shape[0],1),
eigpairs[1][1].reshape(eigvals.shape[0],1)))
Y = data.dot(mat_w) # Re-transform by matrix
# Plot these data
if show:
contcols = ['lightskyblue', 'brown', 'orange', 'springgreen',
'fuchsia', 'tomato', 'gold', 'indigo',
'darkslateblue', 'black', 'darkgreen', 'aqua',
'darkorchid', 'grey', 'salmon', 'plum',
'coral', 'sienna', 'darkkhaki', 'yellowgreen',
'deeppink', 'ivory', 'orchid', 'lightsteelblue']
plt.figure()
if colorcol is not None:
try:
colors = [contcols[list(set(colors)).index(u)] for u in colors]
except:
colors = 'blue'
else:
colors='blue'
plt.scatter(Y[:,0], Y[:,1], color=colors, edgecolor='none',
alpha=0.7)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.tight_layout()
# Explained variance
if expvar: # eigvals come pre-sorted
var_exp = [i/sum(eigvals)*100. for i in eigvals]
cum_var_exp = np.cumsum(var_exp)
#with plt.style.context('seaborn_whitegrid'):
plt.figure()
plt.bar(range(len(var_exp)), var_exp, alpha=0.5, align='center',
label='individual explained variance')
plt.step(range(len(cum_var_exp)), cum_var_exp, where='mid',
label='cumulative explained variance')
plt.xlabel('Principal components')
plt.ylabel('Explained variance (\%100)') # \\%
plt.legend(loc='best')
plt.tight_layout()
plt.show() # Show the plots
return Y
示例4:
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 別名]
#grafico varianzas
plt.figure(figsize=(6, 4))
plt.bar(range(4), var_exp, alpha=0.5, align='center',
label='% Individual de Varianza Descrita')
plt.step(range(4), cum_var_exp, where='mid',
label='% Acumulado de Varianza Descrita')
plt.ylabel('Radio de Varianza Explicada')
plt.xlabel('Componentes Principales')
plt.legend(loc='best')
plt.tight_layout()
#se utilizan las dos primeras pc's, se proyectan y se genera la muestra 2d
matrix_w = np.hstack((eig_pairs[0][1].reshape(18,1),
eig_pairs[1][1].reshape(18,1)))
#proyeccion de las 2 pc's
Y_sklearn = X_std.dot(matrix_w)
data_2d = pd.DataFrame(Y_sklearn)
data_2d.index = data.index
data_2d.columns = ['PC1','PC2']
#la e
#media y varianzas de data 2d
row_means = data.mean(axis=1)
row_trends = data.diff(axis=1).mean(axis=1)
data_2d.plot(kind='scatter', x='PC1', y='PC2', figsize=(16,8), c=row_means,cmap='Blues')
plt.xlabel('Componente Principal 1')
plt.ylabel('Componente Principal 2')
#scatter diver
data_2d.plot(kind='scatter', x='PC1', y='PC2', figsize=(16,8), c=row_means,cmap='seismic')
plt.xlabel('Componente Principal 1')
示例5: sum
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 別名]
E_vecs.dot(Sigma.dot(E_vecs.T))
#Analyse feature reduction / variance trade-off:
sum_evals = sum(E_vals)
retained_variance = [(i / sum_evals)*100 for i in sorted(E_vals, reverse=True)]
cum_retained_variance = np.cumsum(retained_variance)
#print(cum_retained_variance[1000], cum_retained_variance[5250], cum_retained_variance[7000], cum_retained_variance[10000])
#=============Prepare data for XGBoost==============================================================#
#Choose 5250 features giving 80% retained variance
i = 5250
sorted_reduced_evecs = E_vecs[np.argsort(E_vals)[-i:]]
#Determine reduced projection matrix for both (normalised) test and train
Xp = X.dot(sorted_reduced_evecs.T)
X_test_p = X_test.dot(sorted_reduced_evecs.T)
Xp_df = pd.DataFrame(Xp)
X_test_p_df = pd.DataFrame(X_test_p)
#Assemble Train, Test, y
X_train_cols = (training_join_df['App'], Xp_df)
X_test_cols = (test_data_df['App'], X_test_p_df)
y_train_cols = (training_join_df['Label'])
#training_join_df.loc['Desc']
X_train_df = pd.concat(X_train_cols, axis=1)
X_test_df = pd.concat(X_test_cols, axis=1)
#Convert to Array
train_X = X_train_df.values
示例6:
# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 別名]
plt.figure(figsize=(6, 4))
plt.bar(range(4), var_exp, alpha=0.5, align='center',
label='% Individual de Varianza Descrita')
plt.step(range(4), cum_var_exp, where='mid',
label='% Acumulado de Varianza Descrita')
plt.ylabel('Radio de Varianza Descrita')
plt.xlabel('Componentes Principales')
plt.legend(loc='best')
plt.tight_layout()
#se utilizan las dos componentes principales mas grandes
matrix_w = np.hstack((eig_pairs[0][1].reshape(22,1),
eig_pairs[1][1].reshape(22,1)))
#se genera la proyeccion
Y_proy = X_std.dot(matrix_w)
#la d
data_2d = pd.DataFrame(Y_proy)
data_2d.index = data.index
data_2d.columns = ['PC1','PC2']
#la e
row_means = data.mean(axis=1)
row_trends = data.diff(axis=1).mean(axis=1)
#Scatter color secuencial
data_2d.plot(kind='scatter', x='PC1', y='PC2', figsize=(16,8), c=row_means,cmap='Blues')
plt.xlabel('Componente Principal 1')
plt.ylabel('Componente Principal 2')
#Scatter color Divergente