当前位置: 首页>>代码示例>>Python>>正文


Python decomposition.sklearnPCA函数代码示例

本文整理汇总了Python中sklearn.decomposition.sklearnPCA函数的典型用法代码示例。如果您正苦于以下问题:Python sklearnPCA函数的具体用法?Python sklearnPCA怎么用?Python sklearnPCA使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了sklearnPCA函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pca_step_na

def pca_step_na(trans_std,promo_std):
    from sklearn.decomposition import PCA as sklearnPCA
    trans_pca = sklearnPCA(n_components=8)
    trans_new = trans_pca.fit_transform(trans_std)
  
    # promo PCA
    promo_pca = sklearnPCA(n_components=12)
    promo_new = promo_pca.fit_transform(promo_std)
    pca_dict = {"trans":trans_pca,"promo":promo_pca}
    return trans_new,promo_new,pca_dict
开发者ID:raincoatrun,项目名称:Rang-Tech-Data-Competition,代码行数:10,代码来源:KFoldPCA.py

示例2: pca_step

def pca_step(trans_std,food_std,promo_std):
    from sklearn.decomposition import PCA as sklearnPCA
    trans_pca = sklearnPCA(n_components=9)
    trans_new = trans_pca.fit_transform(trans_std)

    #food pca
    food_pca = sklearnPCA(n_components=24)
    food_new = food_pca.fit_transform(food_std)

    # promo PCA
    promo_pca = sklearnPCA(n_components=13)
    promo_new = promo_pca.fit_transform(promo_std)

    pca_dict = {"trans":trans_pca,"food":food_pca,"promo":promo_pca}
    return trans_new,food_new,promo_new,pca_dict
开发者ID:raincoatrun,项目名称:Rang-Tech-Data-Competition,代码行数:15,代码来源:KFoldPCA.py

示例3: reduceDataset

 def reduceDataset(self,nr=3,method='PCA'):
     '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
      Methods available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     #dataset=self.dataset[Model.in_columns]
     #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
     #PCA
     if method=='PCA':
         sklearn_pca = sklearnPCA(n_components=nr)
         reduced = sklearn_pca.fit_transform(dataset)
     #Factor Analysis
     elif method=='FactorAnalysis':
         fa=FactorAnalysis(n_components=nr)
         reduced=fa.fit_transform(dataset)
     #kernel pca with rbf kernel
     elif method=='KPCArbf':
         kpca=KernelPCA(nr,kernel='rbf')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with poly kernel
     elif method=='KPCApoly':
         kpca=KernelPCA(nr,kernel='poly')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with cosine kernel
     elif method=='KPCAcosine':
         kpca=KernelPCA(nr,kernel='cosine')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with sigmoid kernel
     elif method=='KPCAsigmoid':
         kpca=KernelPCA(nr,kernel='sigmoid')
         reduced=kpca.fit_transform(dataset)
     #ICA
     elif method=='IPCA':
         ipca=IncrementalPCA(nr)
         reduced=ipca.fit_transform(dataset)
     #Fast ICA
     elif method=='FastICAParallel':
         fip=FastICA(nr,algorithm='parallel')
         reduced=fip.fit_transform(dataset)
     elif method=='FastICADeflation':
         fid=FastICA(nr,algorithm='deflation')
         reduced=fid.fit_transform(dataset)
     elif method == 'All':
         self.dimensionalityReduction(nr=nr)
         return self
     
     self.ModelInputs.update({method:reduced})
     self.datasetsAvailable.append(method)
     return self
开发者ID:UIUC-SULLIVAN,项目名称:ThesisProject_Andrea_Mattera,代码行数:60,代码来源:Classes.py

示例4: dataframe_components

def dataframe_components(df2,lon,columns):

    import numpy as np
    import pandas as pd
    from sklearn import tree
    from sklearn import metrics
    from sklearn import cross_validation
    import matplotlib.pyplot as plt
    
    from sklearn.decomposition import PCA as sklearnPCA
    X=df2.values
    from sklearn.preprocessing import StandardScaler
    X_std = StandardScaler().fit_transform(X)
    pca=sklearnPCA(n_components=lon).fit_transform(X_std)
    list_comp_pca=[]


    # CREACCION DATAFRAME CON COMPONENTES PRINCIPALES

    for i in range(0,lon):
        v="Componente"+str(i)
        list_comp_pca.append(v)

    dd1=pd.DataFrame(X_std,columns=columns)
    dd2=pd.DataFrame(pca,columns=list_comp_pca)
    df3=pd.concat([dd1,dd2],axis=1)
    return df3
开发者ID:romcra,项目名称:ratios_pca_decisiontree,代码行数:27,代码来源:ratios_pca_decisiontree.py

示例5: pca

    def pca(self):

        # remove WHERE when table cleaned up to remove header rows
        statement = (
            """SELECT transcript_id, TPM, sample_id FROM %s
        where transcript_id != 'Transcript' """
            % self.table
        )

        # fetch data
        df = self.getDataFrame(statement)

        # put dataframe so row=genes, cols = samples, cells contain TPM
        pivot_df = df.pivot("transcript_id", "sample_id")["TPM"]

        # filter dataframe to get rid of genes where TPM == 0 across samples
        filtered_df = pivot_df[pivot_df.sum(axis=1) > 0]

        # add +1 to counts and log transform data.
        logdf = np.log(filtered_df + 0.1)

        # Scale dataframe so variance =1 across rows
        logscaled = sklearn_scale(logdf, axis=1)

        # turn array back to df and add transcript id back to index
        logscaled_df = pd.DataFrame(logscaled)
        logscaled_df.index = list(logdf.index)

        # Now do the PCA - can change n_components
        sklearn_pca = sklearnPCA(n_components=self.n_components)
        sklearn_pca.fit(logscaled_df)

        index = logdf.columns

        return sklearn_pca, index
开发者ID:sudlab,项目名称:CGATPipelines,代码行数:35,代码来源:RnaseqqcReport.py

示例6: pca_analysis

def pca_analysis(indexname,dataframe):
    df = dataframe
    column_count = len(df.columns)

    X = df.ix[:,1:column_count].values
    zip = df.ix[:,0].values

    #Standardize Data
    X_std = StandardScaler().fit_transform(X)
        
    #Generate PCA Components
    sklearn_pca = sklearnPCA(n_components=1)
    Y_sklearn = sklearn_pca.fit_transform(X_std)

    explained_ratio = sklearn_pca.explained_variance_ratio_
    covariance_array = sklearn_pca.get_covariance()

    df_final = pd.DataFrame({'zip5':zip,indexname:Y_sklearn[:,0]})
    
    #Normalize Data on a 0 to 1 scale
    #zip5_final = df_final['zip5'].values
    #minmax_scale = preprocessing.MinMaxScaler().fit(df_final[[indexname]])
    #minmax = minmax_scale.transform(df_final[[indexname]])
    #df_minmax = pd.DataFrame({'zip5':zip5_final,indexname:minmax[:,0]})

    return df_final
开发者ID:DistrictDataLabs,项目名称:03-censusables,代码行数:26,代码来源:Model.py

示例7: kmeans

def kmeans():

    yeast_t = 7
    yeast_k = 6

    yeastData = np.empty([614, 7], dtype = float)
    with open('YeastGene.csv', 'rb') as yeastdata:
        yeastreader = csv.reader(yeastdata, delimiter=',')
        i = 0
        for row in yeastreader:
            yeastData[i] = row
            i += 1
    #print yeastData

    yeastCentroid = np.empty([yeast_k, 7], dtype = float)
    with open('YeastGene_Initial_Centroids.csv', 'rb') as yeastdata:
        yeastreader = csv.reader(yeastdata, delimiter=',')
        i = 0
        for row in yeastreader:
            yeastCentroid[i] = row
            i += 1
    #print yeastCentroid

    for t in range(0, yeast_t):
        yeast_c = [[] for i in range(0,yeast_k)]
        minCentroid = []
        for arr in yeastData:
            for cen in yeastCentroid:
                minCentroid.append(np.linalg.norm(arr - cen))
            yeast_c[minCentroid.index(min(minCentroid))].append(arr)
            minCentroid = []

        for k in range(0,yeast_k):
            yeastCentroid[k] = [float(sum(l))/len(l) for l in zip(*yeast_c[k])]
    #print "The new yeast Centroid values\n"
    #print yeastCentroid

    #print "The cluster sizes are - "
    print len(yeast_c[0]), len(yeast_c[1]), len(yeast_c[2]), len(yeast_c[3]), len(yeast_c[4]), len(yeast_c[5])
    clusters = np.zeros([614, 7], dtype=float)
    prev_len = 0
    for i in range(0,6):
        for j in range(0,len(yeast_c[i])):
            clusters[prev_len] = yeast_c[i][j]
            prev_len += 1

    sklearn_pca = sklearnPCA(n_components = 2)
    transf = sklearn_pca.fit_transform(clusters)
    plt.plot(transf[0:140, 0], transf[0:140, 1],'*', markersize = 7, color='blue', alpha=0.5, label='cluster 1')
    plt.plot(transf[140:191, 0], transf[140:191, 1],'*', markersize = 7, color='red', alpha=0.5, label='cluster 2')
    plt.plot(transf[191:355, 0], transf[191:355, 1],'*', markersize = 7, color='green', alpha=0.5, label='cluster 3')
    plt.plot(transf[355:376, 0], transf[355:376, 1],'*', markersize = 7, color='indigo', alpha=0.5, label='cluster 4')
    plt.plot(transf[376:538, 0], transf[376:538, 1],'*', markersize = 7, color='yellow', alpha=0.5, label='cluster 5')
    plt.plot(transf[538:614, 0], transf[538:614, 1],'*', markersize = 7, color='black', alpha=0.5, label='cluster 6')
    plt.xlim([-10, 10])
    plt.ylim([-10, 10])
    plt.legend()
    plt.title("Kmeans")
    plt.show()
开发者ID:tsmanikandan,项目名称:CSE469-3,代码行数:59,代码来源:cluster.py

示例8: dimensionalityReduction

 def dimensionalityReduction(self,nr=5):
     '''It applies all the dimensionality reduction techniques available in this class:
     Techniques available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     sklearn_pca = sklearnPCA(n_components=nr)
     p_components = sklearn_pca.fit_transform(dataset)
     fa=FactorAnalysis(n_components=nr)
     factors=fa.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='rbf')
     rbf=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='poly')
     poly=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='cosine')
     cosine=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='sigmoid')
     sigmoid=kpca.fit_transform(dataset)
     ipca=IncrementalPCA(nr)
     i_components=ipca.fit_transform(dataset)
     fip=FastICA(nr,algorithm='parallel')
     fid=FastICA(nr,algorithm='deflation')
     ficaD=fip.fit_transform(dataset)
     ficaP=fid.fit_transform(dataset)
     '''isomap=Isomap(n_components=nr).fit_transform(dataset)
     try:
         lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
     except ValueError:
         lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
     try:
         
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
     except ValueError:
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) 
     try:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
     except ValueError:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
     values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
     keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
     self.ModelInputs.update(dict(zip(keys, values)))
     [self.datasetsAvailable.append(key) for key in keys ]
     
     #debug
     #dataset=pd.DataFrame(self.ModelInputs['Dataset'])
     #dataset['Output']=self.ModelOutput
     #self.debug['Dimensionalityreduction']=dataset
     ###
     return self
开发者ID:UIUC-SULLIVAN,项目名称:ThesisProject_Andrea_Mattera,代码行数:59,代码来源:Classes.py

示例9: pcaDecomp

def pcaDecomp(data, normalize = True):
  if normalize:
    data = StandardScaler().fit_transform(data)

  pca = sklearnPCA(n_components = 2)
  decomp = pca.fit_transform(data)
  # plt.scatter(data[:,0], data[:,1])
  # plt.show()
  histo2d(decomp, ranged = False)
开发者ID:mattyhk,项目名称:soccer-meng,代码行数:9,代码来源:Clustering.py

示例10: apply_pca

def apply_pca(data):
    from sklearn.preprocessing import StandardScaler
    X_std = StandardScaler().fit_transform(data)

    from sklearn.decomposition import PCA as sklearnPCA
    sklearn_pca = sklearnPCA(n_components=2)
    Y_sklearn = sklearn_pca.fit_transform(X_std)

    return Y_sklearn
开发者ID:rudolfsberzins,项目名称:Various_code_examples,代码行数:9,代码来源:DAM_apply_pca.py

示例11: pca

 def pca(self, samples):
     '''
     Apply pca from sklearn.
     '''
     sklearn_pca = sklearnPCA(n_components=2)
     # Fit the model with samples
     fit = sklearn_pca.fit(samples)
     # Apply the dimensionality reduction on samples
     pca = fit.transform(samples)
     return pca
开发者ID:GabiThume,项目名称:msc-src,代码行数:10,代码来源:plot-1.0.py

示例12: pca_json

def pca_json(df, n_components=4, exp_var_min=.05):
    sklearn_pca = sklearnPCA(n_components=n_components)
    pca_points = sklearn_pca.fit_transform(df.T)
    exp_var, num_pc = pc_to_keep(sklearn_pca.explained_variance_ratio_,
                                 exp_var_min)
    pca_points_df = trim_pc(pca_points, num_pc)
    pca_points_df['sample'] = df.columns.values
    pca_points_df = append_exp_var(pc_df=pca_points_df,
                                   exp_var_list=exp_var,
                                   num_pc=num_pc)
    return pca_points_df
开发者ID:JakeHagen,项目名称:gene_expression_norm,代码行数:11,代码来源:gene_expression.py

示例13: plotGraph

def plotGraph(samples, n_samples, tags, dimensions):

    colours = ['blue', 'red', 'green', 'yellow', 'black']
    n_tags = len(tags)

    if dimensions == '2D':
        sklearn_pca = sklearnPCA(n_components=2)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        for i in range(n_tags):
            plt.plot(sklearn_transf[i*n_samples:(i+1)*n_samples,0],sklearn_transf[i*n_samples:(i+1)*n_samples,1],\
                 'o', markersize=7, color=colours[i], alpha=0.5, label=tags[i])

        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
    #     plt.xlim([-4,4])
    #     plt.ylim([-4,4])
        plt.legend()
        plt.title('PCA')

    elif dimensions == '3D':
        sklearn_pca = sklearnPCA(n_components=3)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        fig = plt.figure(figsize=(8,8))
        ax = fig.add_subplot(111, projection='3d')
        plt.rcParams['legend.fontsize'] = 10

        for i in range(n_tags):
            ax.plot(sklearn_transf[i*n_samples:(i+1)*n_samples,0], sklearn_transf[i*n_samples:(i+1)*n_samples,1],\
                sklearn_transf[i*n_samples:(i+1)*n_samples,2], 'o', markersize=8, color=colours[i], alpha=0.5, label=tags[i])

        plt.title('PCA')
        ax.legend(loc='upper right')

    # plt.savefig("%s.png" % (dimensions), bbox_inches='tight',dpi=200)
    plt.show()
    # plt.close()

    return True
开发者ID:mrmutator,项目名称:COP_Project,代码行数:40,代码来源:plot.py

示例14: plotGraph

def plotGraph(samples, word, dimensions):
    if dimensions == '2D':
        sklearn_pca = sklearnPCA(n_components=2)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        plt.plot(sklearn_transf[:,0],sklearn_transf[:,1],\
             'o', markersize=7, color='blue', alpha=0.5, label='')
        # plt.plot(sklearn_transf[1::2,0], sklearn_transf[1::2,1],\
        #      '^', markersize=7, color='red', alpha=0.5, label='Matrix')

        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
    #     plt.xlim([-4,4])
        plt.ylim([-.8,.8])
        plt.legend()
        plt.title('Word embeddings PCA')

        print sklearn_transf

    elif dimensions == '3D':
        sklearn_pca = sklearnPCA(n_components=3)
        sklearn_transf = sklearn_pca.fit_transform(samples)

        fig = plt.figure(figsize=(8,8))
        ax = fig.add_subplot(111, projection='3d')
        plt.rcParams['legend.fontsize'] = 10
        ax.plot(sklearn_transf[:,0], sklearn_transf[:,1],\
            sklearn_transf[:,2], 'o', markersize=8, color='blue', alpha=0.5, label='')
        # ax.plot(sklearn_transf[:,0], sklearn_transf[:,1],\
        #     sklearn_transf[:,2], '^', markersize=8, alpha=0.5, color='red', label='Matrix')

        plt.title('Word embeddings PCA')
        ax.legend(loc='upper right')

        print sklearn_transf

    plt.savefig("%s-%s.png" % (word, dimensions), bbox_inches='tight', dpi=200)
    plt.close()

    return True
开发者ID:KatGarmash,项目名称:semantic_compound_splitting,代码行数:40,代码来源:pca.py

示例15: Seleccion_Ratios

def Seleccion_Ratios(df):

    import numpy as np
    import pandas as pd
    
    from sklearn import tree
    #from sklearn import metrics
    from sklearn import cross_validation
    from sklearn.decomposition import PCA as sklearnPCA
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import RandomForestClassifier
    
    # Eliminamos antes del cálculo de las PCAs las columnas target e id.
    
    df.columns = [x.lower() for x in df.columns]
    objetivo = [col for col in df.columns if 'target' in col]
    objetivo = ''.join(objetivo)

    dfBorrar = df[['id', objetivo]]
    borrar = ['id', objetivo]
    dfaux = df.drop(borrar, axis=1)
    
    ListaColumnas= dfaux.columns
    tamDf = len(dfaux.columns)
    X_std = StandardScaler().fit_transform(dfaux.values)
    pca=sklearnPCA(n_components=tamDf).fit_transform(X_std)
    columnas_pca=[]
   
    for i in range(0,pca.shape[0]):
        v="VAR_PCA_"+str(i)
        columnas_pca.append(v)

    df1=pd.DataFrame(X_std,columns=ListaColumnas)
    df2=pd.DataFrame(pca,columns=columnas_pca)
    
   
    df_PCA=pd.concat([df1,df2],axis=1)
    
    y = df[objetivo]
   
    
    forest = RandomForestClassifier(n_estimators=250, random_state=0)
    forest.fit(df_PCA, y)
    importances = forest.feature_importances_
    std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0)
    indices = np.argsort(importances)[::-1]

    # Obtenemos el ranking de los mejores 30
    print("TOP 30:")
    
    for f in range(30):
        print("%d. Ratio %s (%f) " % (f + 1, df_PCA.columns[indices[f]], importances[indices[f]] ))
开发者ID:ciffcesarhernandez,项目名称:practica3,代码行数:52,代码来源:RatioSelection.py


注:本文中的sklearn.decomposition.sklearnPCA函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。