本文整理汇总了Python中sklearn.decomposition.sklearnPCA函数的典型用法代码示例。如果您正苦于以下问题:Python sklearnPCA函数的具体用法?Python sklearnPCA怎么用?Python sklearnPCA使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sklearnPCA函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pca_step_na
def pca_step_na(trans_std,promo_std):
from sklearn.decomposition import PCA as sklearnPCA
trans_pca = sklearnPCA(n_components=8)
trans_new = trans_pca.fit_transform(trans_std)
# promo PCA
promo_pca = sklearnPCA(n_components=12)
promo_new = promo_pca.fit_transform(promo_std)
pca_dict = {"trans":trans_pca,"promo":promo_pca}
return trans_new,promo_new,pca_dict
示例2: pca_step
def pca_step(trans_std,food_std,promo_std):
from sklearn.decomposition import PCA as sklearnPCA
trans_pca = sklearnPCA(n_components=9)
trans_new = trans_pca.fit_transform(trans_std)
#food pca
food_pca = sklearnPCA(n_components=24)
food_new = food_pca.fit_transform(food_std)
# promo PCA
promo_pca = sklearnPCA(n_components=13)
promo_new = promo_pca.fit_transform(promo_std)
pca_dict = {"trans":trans_pca,"food":food_pca,"promo":promo_pca}
return trans_new,food_new,promo_new,pca_dict
示例3: reduceDataset
def reduceDataset(self,nr=3,method='PCA'):
'''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
Methods available:
'PCA'
'FactorAnalysis'
'KPCArbf','KPCApoly'
'KPCAcosine','KPCAsigmoid'
'IPCA'
'FastICADeflation'
'FastICAParallel'
'Isomap'
'LLE'
'LLEmodified'
'LLEltsa'
'''
dataset=self.ModelInputs['Dataset']
#dataset=self.dataset[Model.in_columns]
#dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
#PCA
if method=='PCA':
sklearn_pca = sklearnPCA(n_components=nr)
reduced = sklearn_pca.fit_transform(dataset)
#Factor Analysis
elif method=='FactorAnalysis':
fa=FactorAnalysis(n_components=nr)
reduced=fa.fit_transform(dataset)
#kernel pca with rbf kernel
elif method=='KPCArbf':
kpca=KernelPCA(nr,kernel='rbf')
reduced=kpca.fit_transform(dataset)
#kernel pca with poly kernel
elif method=='KPCApoly':
kpca=KernelPCA(nr,kernel='poly')
reduced=kpca.fit_transform(dataset)
#kernel pca with cosine kernel
elif method=='KPCAcosine':
kpca=KernelPCA(nr,kernel='cosine')
reduced=kpca.fit_transform(dataset)
#kernel pca with sigmoid kernel
elif method=='KPCAsigmoid':
kpca=KernelPCA(nr,kernel='sigmoid')
reduced=kpca.fit_transform(dataset)
#ICA
elif method=='IPCA':
ipca=IncrementalPCA(nr)
reduced=ipca.fit_transform(dataset)
#Fast ICA
elif method=='FastICAParallel':
fip=FastICA(nr,algorithm='parallel')
reduced=fip.fit_transform(dataset)
elif method=='FastICADeflation':
fid=FastICA(nr,algorithm='deflation')
reduced=fid.fit_transform(dataset)
elif method == 'All':
self.dimensionalityReduction(nr=nr)
return self
self.ModelInputs.update({method:reduced})
self.datasetsAvailable.append(method)
return self
示例4: dataframe_components
def dataframe_components(df2,lon,columns):
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn import metrics
from sklearn import cross_validation
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA as sklearnPCA
X=df2.values
from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)
pca=sklearnPCA(n_components=lon).fit_transform(X_std)
list_comp_pca=[]
# CREACCION DATAFRAME CON COMPONENTES PRINCIPALES
for i in range(0,lon):
v="Componente"+str(i)
list_comp_pca.append(v)
dd1=pd.DataFrame(X_std,columns=columns)
dd2=pd.DataFrame(pca,columns=list_comp_pca)
df3=pd.concat([dd1,dd2],axis=1)
return df3
示例5: pca
def pca(self):
# remove WHERE when table cleaned up to remove header rows
statement = (
"""SELECT transcript_id, TPM, sample_id FROM %s
where transcript_id != 'Transcript' """
% self.table
)
# fetch data
df = self.getDataFrame(statement)
# put dataframe so row=genes, cols = samples, cells contain TPM
pivot_df = df.pivot("transcript_id", "sample_id")["TPM"]
# filter dataframe to get rid of genes where TPM == 0 across samples
filtered_df = pivot_df[pivot_df.sum(axis=1) > 0]
# add +1 to counts and log transform data.
logdf = np.log(filtered_df + 0.1)
# Scale dataframe so variance =1 across rows
logscaled = sklearn_scale(logdf, axis=1)
# turn array back to df and add transcript id back to index
logscaled_df = pd.DataFrame(logscaled)
logscaled_df.index = list(logdf.index)
# Now do the PCA - can change n_components
sklearn_pca = sklearnPCA(n_components=self.n_components)
sklearn_pca.fit(logscaled_df)
index = logdf.columns
return sklearn_pca, index
示例6: pca_analysis
def pca_analysis(indexname,dataframe):
df = dataframe
column_count = len(df.columns)
X = df.ix[:,1:column_count].values
zip = df.ix[:,0].values
#Standardize Data
X_std = StandardScaler().fit_transform(X)
#Generate PCA Components
sklearn_pca = sklearnPCA(n_components=1)
Y_sklearn = sklearn_pca.fit_transform(X_std)
explained_ratio = sklearn_pca.explained_variance_ratio_
covariance_array = sklearn_pca.get_covariance()
df_final = pd.DataFrame({'zip5':zip,indexname:Y_sklearn[:,0]})
#Normalize Data on a 0 to 1 scale
#zip5_final = df_final['zip5'].values
#minmax_scale = preprocessing.MinMaxScaler().fit(df_final[[indexname]])
#minmax = minmax_scale.transform(df_final[[indexname]])
#df_minmax = pd.DataFrame({'zip5':zip5_final,indexname:minmax[:,0]})
return df_final
示例7: kmeans
def kmeans():
yeast_t = 7
yeast_k = 6
yeastData = np.empty([614, 7], dtype = float)
with open('YeastGene.csv', 'rb') as yeastdata:
yeastreader = csv.reader(yeastdata, delimiter=',')
i = 0
for row in yeastreader:
yeastData[i] = row
i += 1
#print yeastData
yeastCentroid = np.empty([yeast_k, 7], dtype = float)
with open('YeastGene_Initial_Centroids.csv', 'rb') as yeastdata:
yeastreader = csv.reader(yeastdata, delimiter=',')
i = 0
for row in yeastreader:
yeastCentroid[i] = row
i += 1
#print yeastCentroid
for t in range(0, yeast_t):
yeast_c = [[] for i in range(0,yeast_k)]
minCentroid = []
for arr in yeastData:
for cen in yeastCentroid:
minCentroid.append(np.linalg.norm(arr - cen))
yeast_c[minCentroid.index(min(minCentroid))].append(arr)
minCentroid = []
for k in range(0,yeast_k):
yeastCentroid[k] = [float(sum(l))/len(l) for l in zip(*yeast_c[k])]
#print "The new yeast Centroid values\n"
#print yeastCentroid
#print "The cluster sizes are - "
print len(yeast_c[0]), len(yeast_c[1]), len(yeast_c[2]), len(yeast_c[3]), len(yeast_c[4]), len(yeast_c[5])
clusters = np.zeros([614, 7], dtype=float)
prev_len = 0
for i in range(0,6):
for j in range(0,len(yeast_c[i])):
clusters[prev_len] = yeast_c[i][j]
prev_len += 1
sklearn_pca = sklearnPCA(n_components = 2)
transf = sklearn_pca.fit_transform(clusters)
plt.plot(transf[0:140, 0], transf[0:140, 1],'*', markersize = 7, color='blue', alpha=0.5, label='cluster 1')
plt.plot(transf[140:191, 0], transf[140:191, 1],'*', markersize = 7, color='red', alpha=0.5, label='cluster 2')
plt.plot(transf[191:355, 0], transf[191:355, 1],'*', markersize = 7, color='green', alpha=0.5, label='cluster 3')
plt.plot(transf[355:376, 0], transf[355:376, 1],'*', markersize = 7, color='indigo', alpha=0.5, label='cluster 4')
plt.plot(transf[376:538, 0], transf[376:538, 1],'*', markersize = 7, color='yellow', alpha=0.5, label='cluster 5')
plt.plot(transf[538:614, 0], transf[538:614, 1],'*', markersize = 7, color='black', alpha=0.5, label='cluster 6')
plt.xlim([-10, 10])
plt.ylim([-10, 10])
plt.legend()
plt.title("Kmeans")
plt.show()
示例8: dimensionalityReduction
def dimensionalityReduction(self,nr=5):
'''It applies all the dimensionality reduction techniques available in this class:
Techniques available:
'PCA'
'FactorAnalysis'
'KPCArbf','KPCApoly'
'KPCAcosine','KPCAsigmoid'
'IPCA'
'FastICADeflation'
'FastICAParallel'
'Isomap'
'LLE'
'LLEmodified'
'LLEltsa'
'''
dataset=self.ModelInputs['Dataset']
sklearn_pca = sklearnPCA(n_components=nr)
p_components = sklearn_pca.fit_transform(dataset)
fa=FactorAnalysis(n_components=nr)
factors=fa.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='rbf')
rbf=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='poly')
poly=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='cosine')
cosine=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='sigmoid')
sigmoid=kpca.fit_transform(dataset)
ipca=IncrementalPCA(nr)
i_components=ipca.fit_transform(dataset)
fip=FastICA(nr,algorithm='parallel')
fid=FastICA(nr,algorithm='deflation')
ficaD=fip.fit_transform(dataset)
ficaP=fid.fit_transform(dataset)
'''isomap=Isomap(n_components=nr).fit_transform(dataset)
try:
lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
except ValueError:
lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
try:
lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
except ValueError:
lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset)
try:
lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
except ValueError:
lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
self.ModelInputs.update(dict(zip(keys, values)))
[self.datasetsAvailable.append(key) for key in keys ]
#debug
#dataset=pd.DataFrame(self.ModelInputs['Dataset'])
#dataset['Output']=self.ModelOutput
#self.debug['Dimensionalityreduction']=dataset
###
return self
示例9: pcaDecomp
def pcaDecomp(data, normalize = True):
if normalize:
data = StandardScaler().fit_transform(data)
pca = sklearnPCA(n_components = 2)
decomp = pca.fit_transform(data)
# plt.scatter(data[:,0], data[:,1])
# plt.show()
histo2d(decomp, ranged = False)
示例10: apply_pca
def apply_pca(data):
from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(data)
from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=2)
Y_sklearn = sklearn_pca.fit_transform(X_std)
return Y_sklearn
示例11: pca
def pca(self, samples):
'''
Apply pca from sklearn.
'''
sklearn_pca = sklearnPCA(n_components=2)
# Fit the model with samples
fit = sklearn_pca.fit(samples)
# Apply the dimensionality reduction on samples
pca = fit.transform(samples)
return pca
示例12: pca_json
def pca_json(df, n_components=4, exp_var_min=.05):
sklearn_pca = sklearnPCA(n_components=n_components)
pca_points = sklearn_pca.fit_transform(df.T)
exp_var, num_pc = pc_to_keep(sklearn_pca.explained_variance_ratio_,
exp_var_min)
pca_points_df = trim_pc(pca_points, num_pc)
pca_points_df['sample'] = df.columns.values
pca_points_df = append_exp_var(pc_df=pca_points_df,
exp_var_list=exp_var,
num_pc=num_pc)
return pca_points_df
示例13: plotGraph
def plotGraph(samples, n_samples, tags, dimensions):
colours = ['blue', 'red', 'green', 'yellow', 'black']
n_tags = len(tags)
if dimensions == '2D':
sklearn_pca = sklearnPCA(n_components=2)
sklearn_transf = sklearn_pca.fit_transform(samples)
for i in range(n_tags):
plt.plot(sklearn_transf[i*n_samples:(i+1)*n_samples,0],sklearn_transf[i*n_samples:(i+1)*n_samples,1],\
'o', markersize=7, color=colours[i], alpha=0.5, label=tags[i])
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
# plt.xlim([-4,4])
# plt.ylim([-4,4])
plt.legend()
plt.title('PCA')
elif dimensions == '3D':
sklearn_pca = sklearnPCA(n_components=3)
sklearn_transf = sklearn_pca.fit_transform(samples)
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
plt.rcParams['legend.fontsize'] = 10
for i in range(n_tags):
ax.plot(sklearn_transf[i*n_samples:(i+1)*n_samples,0], sklearn_transf[i*n_samples:(i+1)*n_samples,1],\
sklearn_transf[i*n_samples:(i+1)*n_samples,2], 'o', markersize=8, color=colours[i], alpha=0.5, label=tags[i])
plt.title('PCA')
ax.legend(loc='upper right')
# plt.savefig("%s.png" % (dimensions), bbox_inches='tight',dpi=200)
plt.show()
# plt.close()
return True
示例14: plotGraph
def plotGraph(samples, word, dimensions):
if dimensions == '2D':
sklearn_pca = sklearnPCA(n_components=2)
sklearn_transf = sklearn_pca.fit_transform(samples)
plt.plot(sklearn_transf[:,0],sklearn_transf[:,1],\
'o', markersize=7, color='blue', alpha=0.5, label='')
# plt.plot(sklearn_transf[1::2,0], sklearn_transf[1::2,1],\
# '^', markersize=7, color='red', alpha=0.5, label='Matrix')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
# plt.xlim([-4,4])
plt.ylim([-.8,.8])
plt.legend()
plt.title('Word embeddings PCA')
print sklearn_transf
elif dimensions == '3D':
sklearn_pca = sklearnPCA(n_components=3)
sklearn_transf = sklearn_pca.fit_transform(samples)
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
plt.rcParams['legend.fontsize'] = 10
ax.plot(sklearn_transf[:,0], sklearn_transf[:,1],\
sklearn_transf[:,2], 'o', markersize=8, color='blue', alpha=0.5, label='')
# ax.plot(sklearn_transf[:,0], sklearn_transf[:,1],\
# sklearn_transf[:,2], '^', markersize=8, alpha=0.5, color='red', label='Matrix')
plt.title('Word embeddings PCA')
ax.legend(loc='upper right')
print sklearn_transf
plt.savefig("%s-%s.png" % (word, dimensions), bbox_inches='tight', dpi=200)
plt.close()
return True
示例15: Seleccion_Ratios
def Seleccion_Ratios(df):
import numpy as np
import pandas as pd
from sklearn import tree
#from sklearn import metrics
from sklearn import cross_validation
from sklearn.decomposition import PCA as sklearnPCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
# Eliminamos antes del cálculo de las PCAs las columnas target e id.
df.columns = [x.lower() for x in df.columns]
objetivo = [col for col in df.columns if 'target' in col]
objetivo = ''.join(objetivo)
dfBorrar = df[['id', objetivo]]
borrar = ['id', objetivo]
dfaux = df.drop(borrar, axis=1)
ListaColumnas= dfaux.columns
tamDf = len(dfaux.columns)
X_std = StandardScaler().fit_transform(dfaux.values)
pca=sklearnPCA(n_components=tamDf).fit_transform(X_std)
columnas_pca=[]
for i in range(0,pca.shape[0]):
v="VAR_PCA_"+str(i)
columnas_pca.append(v)
df1=pd.DataFrame(X_std,columns=ListaColumnas)
df2=pd.DataFrame(pca,columns=columnas_pca)
df_PCA=pd.concat([df1,df2],axis=1)
y = df[objetivo]
forest = RandomForestClassifier(n_estimators=250, random_state=0)
forest.fit(df_PCA, y)
importances = forest.feature_importances_
std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
indices = np.argsort(importances)[::-1]
# Obtenemos el ranking de los mejores 30
print("TOP 30:")
for f in range(30):
print("%d. Ratio %s (%f) " % (f + 1, df_PCA.columns[indices[f]], importances[indices[f]] ))