当前位置: 首页>>代码示例>>Python>>正文


Python StandardScaler.dot方法代码示例

本文整理汇总了Python中sklearn.preprocessing.StandardScaler.dot方法的典型用法代码示例。如果您正苦于以下问题:Python StandardScaler.dot方法的具体用法?Python StandardScaler.dot怎么用?Python StandardScaler.dot使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.StandardScaler的用法示例。


在下文中一共展示了StandardScaler.dot方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pca

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 别名]
def pca(X, ndims=3):
	"""Runs PCA on provided data, X, and returns the projection onto ndims principal components.
This function assumes X has data series in columns.
This function also returns the covariance matrix of the data (scaled to zero norm and unit variance), as well as the eigen vectors and values of that matrix.

Input:
	X : ndarray with data series in columns (e.g. one neuron's calcium trace (or DF/F) per column)
	ndims : the number of dimensions to project down to. Default is 3 for fancy 3d scatter plots.
Output:
	Y : Projected, scaled data.
	cov_mat : Covariance matrix of the scaled data
	eig_pairs : a list of tuples. Each tuple is of the form (eigen value, eigen vector), and they are sorted high to low"""
	original_dims = X.shape[1];
	if ndims > original_dims:
		ndims = original_dims
	#TODO Check what this scaler is actually doing; it might be scaling columns independently
	X_std = StandardScaler().fit_transform(X)
	cov_mat = np.cov(X.T)
	eig_vals, eig_vecs = np.linalg.eig(cov_mat)
	eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:, i]) for i in range(len(eig_vals))]
	eig_pairs.sort(key=lambda x: x[0], reverse=True)
	W = np.hstack((eig_pairs[i][1].reshape(original_dims,1) for i in range(ndims)))
	Y = X_std.dot(W)
	return Y, cov_mat, eig_pairs
开发者ID:AllenBrainAtlas,项目名称:SWDB-2016,代码行数:26,代码来源:main.py

示例2: Data

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 别名]
data = Data([trace1, trace2])

layout=Layout(
        yaxis=YAxis(title='Explained variance in percent'),
        title='Explained variance by different principal components')

fig = Figure(data=data, layout=layout)
py.iplot(fig)
#%%
matrix_w = np.hstack((eig_pairs[0][1].reshape(13,1), 
                      eig_pairs[1][1].reshape(13,1)))

print('Matrix W:\n', matrix_w)

#%%
Y = X_std.dot(matrix_w)
traces = []

for name in (1,2,3):

    trace = Scatter(
        x=Y[y==name,0],
        y=Y[y==name,1],
        mode='markers',
        name=name,
        marker=Marker(
            size=12,
            line=Line(
                color='rgba(217, 217, 217, 0.14)',
                width=0.5),
            opacity=0.8))
开发者ID:wardence,项目名称:SkillsWorkshop2017,代码行数:33,代码来源:yuqingwang.py

示例3: df_pca

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 别名]
def df_pca(df_in, keep=None, expvar=False, rmoutliers=True, show=True,
           colorcol=None):
  """
  Run a simple PCA on the df features of keep.
  If expvar is True, a plot of explained variance is also shown.
  Heavily inspired by http://sebastianraschka.com/Articles/2015_pca_in_3_steps.html
  """
  from sklearn.preprocessing import StandardScaler
  if keep is None:
    keep = ['maxV', 'maxDerivV', 'maxDerivdV', 'minDerivV', 
            'minDerivdV', 'preMinV', 'postMinV', 'preMaxCurveK', 
            'postMaxCurveK', 'postMaxCurveV', 'preMaxCurveV', 'height', 
            'repolarizationV', 'intervals']
  # Clean the data frame
  df = df_in.copy()
  for col in df.columns:
    if col not in keep:
      df = df.drop(col, 1)
    else:
      if col != colorcol:
        df[col] = outlier(df[col].values)
  df = df.dropna()
  if colorcol is not None:
    colors = df[colorcol].values
    df = df.drop(colorcol, 1)
  # Make into np.array
  data = []
  for col in df.columns:
    temp_ = df[col]
    data.append(temp_)
  data = np.array(data).T # Make as array and transpose
  data = StandardScaler().fit_transform(data) # Standardize data
  
  # run pca (svd)
  u, eigvals, eigvecs = np.linalg.svd(data, full_matrices=False)
  eigpairs = [(np.abs(eigvals[i]), eigvecs[:,i])
              for i in range(len(eigvals))]
  eigpairs.sort()
  eigpairs.reverse()
  mat_w = np.hstack((eigpairs[0][1].reshape(eigvals.shape[0],1),
                      eigpairs[1][1].reshape(eigvals.shape[0],1)))
  Y = data.dot(mat_w) # Re-transform by matrix
  
  # Plot these data
  if show:
    contcols = ['lightskyblue', 'brown', 'orange', 'springgreen',
            'fuchsia', 'tomato', 'gold', 'indigo',
            'darkslateblue', 'black', 'darkgreen', 'aqua',
            'darkorchid', 'grey', 'salmon', 'plum',
            'coral', 'sienna', 'darkkhaki', 'yellowgreen',
            'deeppink', 'ivory', 'orchid', 'lightsteelblue']
    plt.figure()
    if colorcol is not None:
      try:
        colors = [contcols[list(set(colors)).index(u)] for u in colors]
      except:
        colors = 'blue'
    else:
      colors='blue'
    plt.scatter(Y[:,0], Y[:,1], color=colors, edgecolor='none',
                alpha=0.7)
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.tight_layout()
  
    # Explained variance
    if expvar: # eigvals come pre-sorted
      var_exp = [i/sum(eigvals)*100. for i in eigvals]
      cum_var_exp = np.cumsum(var_exp)
      #with plt.style.context('seaborn_whitegrid'):
      plt.figure()
      plt.bar(range(len(var_exp)), var_exp, alpha=0.5, align='center',
              label='individual explained variance')
      plt.step(range(len(cum_var_exp)), cum_var_exp, where='mid',
               label='cumulative explained variance')
      plt.xlabel('Principal components')
      plt.ylabel('Explained variance (\%100)') # \\%
      plt.legend(loc='best')
      plt.tight_layout()
    
    plt.show() # Show the plots
  return Y
开发者ID:acsutt0n,项目名称:Drosophila,代码行数:84,代码来源:activityAnalysis.py

示例4:

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 别名]
#grafico varianzas
plt.figure(figsize=(6, 4))
plt.bar(range(4), var_exp, alpha=0.5, align='center',
        label='% Individual de Varianza Descrita')
plt.step(range(4), cum_var_exp, where='mid',
         label='% Acumulado de Varianza Descrita')
plt.ylabel('Radio de Varianza Explicada')
plt.xlabel('Componentes Principales')
plt.legend(loc='best')
plt.tight_layout()

#se utilizan las dos primeras pc's, se proyectan y se genera la muestra 2d
matrix_w = np.hstack((eig_pairs[0][1].reshape(18,1),
                      eig_pairs[1][1].reshape(18,1)))
#proyeccion de las 2 pc's 
Y_sklearn = X_std.dot(matrix_w)
data_2d = pd.DataFrame(Y_sklearn)
data_2d.index = data.index
data_2d.columns = ['PC1','PC2']

#la e
#media y varianzas de data 2d
row_means = data.mean(axis=1)
row_trends = data.diff(axis=1).mean(axis=1)
data_2d.plot(kind='scatter', x='PC1', y='PC2', figsize=(16,8), c=row_means,cmap='Blues')
plt.xlabel('Componente Principal 1')
plt.ylabel('Componente Principal 2')

#scatter diver
data_2d.plot(kind='scatter', x='PC1', y='PC2', figsize=(16,8), c=row_means,cmap='seismic')
plt.xlabel('Componente Principal 1')
开发者ID:anblcode,项目名称:Tarea1,代码行数:33,代码来源:pca_tb.py

示例5: sum

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 别名]
E_vecs.dot(Sigma.dot(E_vecs.T))

#Analyse feature reduction / variance trade-off:
sum_evals = sum(E_vals)
retained_variance = [(i / sum_evals)*100 for i in sorted(E_vals, reverse=True)]
cum_retained_variance = np.cumsum(retained_variance)
#print(cum_retained_variance[1000], cum_retained_variance[5250], cum_retained_variance[7000], cum_retained_variance[10000])


#=============Prepare data for XGBoost==============================================================#
#Choose 5250 features giving 80% retained variance
i = 5250
sorted_reduced_evecs = E_vecs[np.argsort(E_vals)[-i:]]

#Determine reduced projection matrix for both (normalised) test and train
Xp = X.dot(sorted_reduced_evecs.T)
X_test_p = X_test.dot(sorted_reduced_evecs.T)
Xp_df = pd.DataFrame(Xp)
X_test_p_df = pd.DataFrame(X_test_p)

#Assemble Train, Test, y
X_train_cols = (training_join_df['App'], Xp_df)
X_test_cols = (test_data_df['App'], X_test_p_df)
y_train_cols = (training_join_df['Label'])
#training_join_df.loc['Desc']

X_train_df = pd.concat(X_train_cols, axis=1)
X_test_df = pd.concat(X_test_cols, axis=1)

#Convert to Array
train_X = X_train_df.values
开发者ID:tijohnso,项目名称:Usyd_masters,代码行数:33,代码来源:mlass1.py

示例6:

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import dot [as 别名]
plt.figure(figsize=(6, 4))
plt.bar(range(4), var_exp, alpha=0.5, align='center',
        label='% Individual de Varianza Descrita')
plt.step(range(4), cum_var_exp, where='mid',
	label='% Acumulado de Varianza Descrita')
plt.ylabel('Radio de Varianza Descrita')
plt.xlabel('Componentes Principales')
plt.legend(loc='best')
plt.tight_layout()

#se utilizan las dos componentes principales mas grandes
matrix_w = np.hstack((eig_pairs[0][1].reshape(22,1),
		      eig_pairs[1][1].reshape(22,1)))

#se genera la proyeccion 
Y_proy = X_std.dot(matrix_w)

#la d
data_2d = pd.DataFrame(Y_proy)
data_2d.index = data.index
data_2d.columns = ['PC1','PC2']


#la e
row_means = data.mean(axis=1)
row_trends = data.diff(axis=1).mean(axis=1)
	#Scatter color secuencial 
data_2d.plot(kind='scatter', x='PC1', y='PC2', figsize=(16,8), c=row_means,cmap='Blues')
plt.xlabel('Componente Principal 1')
plt.ylabel('Componente Principal 2')
	#Scatter color Divergente 
开发者ID:anblcode,项目名称:Tarea1,代码行数:33,代码来源:pca_vih.py


注:本文中的sklearn.preprocessing.StandardScaler.dot方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。