当前位置: 首页>>代码示例>>Python>>正文


Python PCA.get_covariance方法代码示例

本文整理汇总了Python中sklearn.decomposition.PCA.get_covariance方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.get_covariance方法的具体用法?Python PCA.get_covariance怎么用?Python PCA.get_covariance使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.PCA的用法示例。


在下文中一共展示了PCA.get_covariance方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_pca

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
def test_pca():
    """PCA on dense arrays"""
    pca = PCA(n_components=2)
    X = iris.data
    X_r = pca.fit(X).transform(X)
    np.testing.assert_equal(X_r.shape[1], 2)

    X_r2 = pca.fit_transform(X)
    assert_array_almost_equal(X_r, X_r2)

    pca = PCA()
    pca.fit(X)
    assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)

    X_r = pca.transform(X)
    X_r2 = pca.fit_transform(X)

    assert_array_almost_equal(X_r, X_r2)

    # Test get_covariance and get_precision with n_components == n_features
    # with n_components < n_features and with n_components == 0
    for n_components in [0, 2, X.shape[1]]:
        pca.n_components = n_components
        pca.fit(X)
        cov = pca.get_covariance()
        precision = pca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
开发者ID:Garrett-R,项目名称:scikit-learn,代码行数:29,代码来源:test_pca.py

示例2: test_pca_randomized_solver

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
def test_pca_randomized_solver():
    # PCA on dense arrays
    X = iris.data

    # Loop excluding the 0, invalid for randomized
    for n_comp in np.arange(1, X.shape[1]):
        pca = PCA(n_components=n_comp, svd_solver='randomized', random_state=0)

        X_r = pca.fit(X).transform(X)
        np.testing.assert_equal(X_r.shape[1], n_comp)

        X_r2 = pca.fit_transform(X)
        assert_array_almost_equal(X_r, X_r2)

        X_r = pca.transform(X)
        assert_array_almost_equal(X_r, X_r2)

        # Test get_covariance and get_precision
        cov = pca.get_covariance()
        precision = pca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]), 12)

    pca = PCA(n_components=0, svd_solver='randomized', random_state=0)
    assert_raises(ValueError, pca.fit, X)

    pca = PCA(n_components=0, svd_solver='randomized', random_state=0)
    assert_raises(ValueError, pca.fit, X)
    # Check internal state
    assert_equal(pca.n_components,
                 PCA(n_components=0,
                     svd_solver='randomized', random_state=0).n_components)
    assert_equal(pca.svd_solver,
                 PCA(n_components=0,
                     svd_solver='randomized', random_state=0).svd_solver)
开发者ID:amueller,项目名称:scikit-learn,代码行数:37,代码来源:test_pca.py

示例3: test_pca

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
def test_pca():
    # PCA on dense arrays
    X = iris.data

    for n_comp in np.arange(X.shape[1]):
        pca = PCA(n_components=n_comp, svd_solver='full')

        X_r = pca.fit(X).transform(X)
        np.testing.assert_equal(X_r.shape[1], n_comp)

        X_r2 = pca.fit_transform(X)
        assert_array_almost_equal(X_r, X_r2)

        X_r = pca.transform(X)
        X_r2 = pca.fit_transform(X)
        assert_array_almost_equal(X_r, X_r2)

        # Test get_covariance and get_precision
        cov = pca.get_covariance()
        precision = pca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]), 12)

    # test explained_variance_ratio_ == 1 with all components
    pca = PCA(svd_solver='full')
    pca.fit(X)
    assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
开发者ID:amueller,项目名称:scikit-learn,代码行数:29,代码来源:test_pca.py

示例4: gtm_pc_initialization

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
    def gtm_pc_initialization(self):
        """ Calculation of weight matrix using principal components
        :return: w: Initialized weight matrix
        :return: beta: Initial scalar value of the inverse variance common to all components of the mixture
        """
        # Calculation of principal components and their explained variance
        pca = PCA()
        pca.fit(self.centered_input_data)
        # Eigenvectors scaled by their respective eigenvalues
        [eigenvalues, eigenvector] = np.linalg.eig(pca.get_covariance())
        idx = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[idx]
        eigenvector = eigenvector[:, idx]
        eigenvector_scaled = np.dot(eigenvector[:, 0:self.z.shape[0]], np.diag(np.sqrt(eigenvalues
                                                                                       [0:self.z.shape[0]])))
        # Normalized latent distribution and weight matrix initialization
        z_norm = np.dot(np.diag(1/np.std(self.z, axis=1)), self.z - np.dot(np.diag(np.mean(self.z, axis=1)),
                                                                           np.ones(self.z.shape)))
        # eigenvector_scaled[:, 1] = - eigenvector_scaled[:, 1]
        lhs = self.fi
        rhs = np.dot(np.transpose(z_norm), np.transpose(eigenvector_scaled))
        w = np.linalg.lstsq(lhs, rhs)[0]
        w[-1, :] = np.mean(self.centered_input_data, 0)
        rhs2 = np.linalg.pinv(rhs)
        w2 = np.dot(np.transpose(lhs), np.transpose(np.linalg.pinv(rhs)))

        # Beta initialization
        beta_matrix = np.dot(self.fi, w)
        inter_distance = cdist(beta_matrix, beta_matrix, 'sqeuclidean')
        np.fill_diagonal(inter_distance, np.inf)
        mean_nearest_neighbor = np.mean(np.min(inter_distance))
        beta = 2 / mean_nearest_neighbor
        if self.z.shape[0] < self.input_data.shape[1]:
            beta = min(beta, 1 / pca.explained_variance_[self.z.shape[0]])
        return w, beta
开发者ID:mattoescobar,项目名称:Machine-Learning,代码行数:37,代码来源:GTM.py

示例5: load_data

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
def load_data():

    # load the CSV file as a numpy matrix
    dataset = np.loadtxt(csv_filename, delimiter=';', skiprows=1)

    # separate the data from the target attributes
    X = dataset[:,:11]
    # reduce dimension of X with pca
    pca = PCA(n_components=2)
    pca.fit(X)
    print "Matriz de covariancia sin eliminar atributos"
    print pca.get_covariance()

    X = dataset[:, [0,1,2,3,4,7,8,9,10]]
    # reduce dimension of X with pca
    pca = PCA(n_components=2)
    pca.fit(X)
    print "Matriz de covariancia eliminando atributos 5 y 6"
    print pca.get_covariance()

    X = dataset[:, [1,2,3,4,7,8,9,10]]
    # reduce dimension of X with pca
    pca = PCA(n_components=2)
    pca.fit(X)
    print "Matriz de covariancia eliminando atributos 0, 5 y 6"
    print pca.get_covariance()

    X = pca.transform(X)
    y = dataset[:,11]
    return X, y
开发者ID:leanahabedian,项目名称:TPAA,代码行数:32,代码来源:tp_part2.py

示例6: compute_PCA

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
def compute_PCA(array):

    nimages0, nchannels0, height0, width0 = array.shape
    rolled = np.transpose(array, (0, 2, 3, 1))
    # transpose from N x channels x height x width  to  N x height x width x channels
    nimages1, height1, width1, nchannels1 = rolled.shape
    # check shapes
    assert nimages0 == nimages1
    assert nchannels0 == nchannels1
    assert height0 == height1
    assert width0 == width1
    # flatten
    reshaped = rolled.reshape(nimages1 * height1 * width1, nchannels1)
    
    from sklearn.decomposition import PCA
    
    pca = PCA()
    pca.fit(reshaped)
    
    cov = pca.get_covariance()
    
    eigenvalues, eigenvectors = np.linalg.eig(cov)
    
    return eigenvalues, eigenvectors
开发者ID:EdwardJKim,项目名称:dl4astro,代码行数:26,代码来源:train_cnn.py

示例7: StandardScaler

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
print y.head()

print mushrooms.corr()

# scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X=scaler.fit_transform(X)
print X


from sklearn.decomposition import PCA
pca = PCA()
pca.fit_transform(X)

covariance=pca.get_covariance()

print covariance, '********************'
explained_variance=pca.explained_variance_
explained_variance

with plt.style.context('dark_background'):
    plt.figure(figsize=(6, 4))
    
    plt.bar(range(22), explained_variance, alpha=0.5, align='center',
            label='individual explained variance')
    plt.ylabel('Explained variance ratio')
    plt.xlabel('Principal components')
    plt.legend(loc='best')
    plt.tight_layout()
开发者ID:fzhurd,项目名称:fzwork,代码行数:32,代码来源:predict_mushroom_v1b.py

示例8: PCA

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
dataset_input = dataset[:,0:12]
names = np.array(['class1', 'class2', 'class3'])

b = {'data' : dataset_input, 'target' : dataset_output, 'target_names' : names}

X = b['data']
y = b['target']
target_names = b['target_names']

#X = np.array([[-1, -1, 0], [-2, -1, -1], [-3, -2, -2], [1, 1, 1], [2, 1, 0], [3, 2, 1]])

pca = PCA(n_components=10)
X1 = -scale(pca.fit_transform(X))
#print(pca.explained_variance_ratio_) 
print 'variance2 '
print pca.get_covariance()

#print X
print 'X1'
print X1

#print X1[0:4,0]
#print X1[0:4,1]

Y = pca.inverse_transform(X1)

plt.scatter(X[:, 0], X[:, 1]) 
plt.show()

#plt.plot(X, y, 'o')
#plt.plot(x2, y + 0.5, 'o')
开发者ID:eldersantos,项目名称:gpam,代码行数:33,代码来源:pca.py

示例9: fxData

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]

#.........这里部分代码省略.........
        return components, variance
    
    def plotPCA(self, periodicity = 'daily', fxRisk = None, style = 'dark_background'):
        components, variance = self.PCAFXAnalysis(periodicity = periodicity, fxRisk = fxRisk)
        n = float(len(variance))
        
        title = periodicity + ' PCA '
        if fxRisk: title += fxRisk
        
        with plt.style.context((style)):
            axes = components.plot(kind = 'barh', legend = False, figsize = [15,n*2.5], subplots = True, layout = [int(np.ceil(n/3)),3], title = title, sharex=False, style = 'winter')#, layout = [np.floor(n**0.5), np.floor(n**0.5)+3])
            for axe, v in zip(axes.flatten(),variance.values):
                axe.set_title(str(round(v[0]*100, 2)) + '%')
#            plt.gcf().autolayout = True
                
            if n <= 3:
                top = 0.9 
            else:
                top = 0.95    
                
            plt.subplots_adjust(left=None, bottom=None, right=None, top=top, wspace=None, hspace=None)
#            plt.tight_layout()
#        andrews_curves(components, 1)
    
    def plotCurrencyBasketIndex(self, periodicity = 'daily', fxRisk = None, style = 'dark_background'):
        currencyBasket = (1+self.getCurrencyBasketFromDB(periodicity = periodicity, fxRisk = fxRisk)).cumprod()
        n = float(len(currencyBasket.columns))
        
        title = 'Return Index '
        if fxRisk: title += fxRisk
        
        with plt.style.context((style)):
            axes = currencyBasket.plot( figsize = [18,n*1.1], subplots = True, layout = [int(np.ceil(n/3)),3], xticks = currencyBasket.index[::5], title =  title, sharex=False, style = 'g.--', rot = 45)
            
#            axes = currencyBasket.plot(subplots = True)
            for axe, v in zip(axes.flatten(),currencyBasket.columns):
                axe.legend([v])
#                axe.set_title(v)
                
                
            plt.gcf().autolayout = True
#            axes.tight_layout()   
            
#            plt.tight_layout()
            
    def getCorrelationVariance(self, periodicity = 'daily', fxRisk = None, numeraire = 'None'):
        
        if numeraire== 'USD':
            currencyBasket = self.getAllCurrencyUSDReturnData(periodicity = periodicity, fxRisk = fxRisk)
        else:    
            currencyBasket = self.getCurrencyBasketFromDB(periodicity = periodicity, fxRisk = fxRisk)
        
            
        self.pca = PCA()
        self.pca.fit_transform(currencyBasket.fillna(0))
        covariance = self.pca.get_covariance()
        variance = np.sqrt(np.diag(covariance))*np.eye(len(covariance))
        varianceInv = np.linalg.inv(variance)
        corr = np.dot(covariance,varianceInv)
        corr = np.dot(varianceInv,corr)
        corrDF = pd.DataFrame(corr, columns = currencyBasket.columns, index = currencyBasket.columns)
        
        varDF = pd.Series(np.sqrt(np.diag(covariance)), index = currencyBasket.columns)
        return corrDF, varDF
        
    def plotCorrelationMatrix(self, periodicity = 'daily', fxRisk = None, numeraire = 'None', style = 'dark_background'):
        
        corr, variance = self.getCorrelationVariance(periodicity = periodicity, fxRisk = fxRisk, numeraire = numeraire)
        df = corr
        
        with plt.style.context((style)):
            plt.figure(figsize = (15,10))
            plt.pcolor(df, cmap='coolwarm', vmin = -1, vmax = 1)
            
            for (i, j), z in np.ndenumerate(corr.values):
                plt.text(j+0.5, i+0.5, '{:0.2f}'.format(z), ha='center', va='center')
                
            plt.yticks(np.arange(0.5, len(df.index), 1), df.columns)
            plt.xticks(np.arange(0.5, len(df.index), 1), df.columns, rotation = 45)
            
            ax = plt.gca()
            ax.invert_xaxis()
            ax.xaxis.tick_top()
        
            plt.yticks(np.arange(0.5, len(df.index), 1), df.columns)
            plt.xticks(np.arange(0.5, len(df.index), 1), df.columns, rotation = 45)
            plt.colorbar()
            plt.show()
            
    def risk_type(self):
        #Función que retorna los tipo de fxRisk cargados en BD
        with self.openConnection() as con:
            cur = con.cursor()
            cur.execute("""SELECT DISTINCT FXRISK FROM fxIdentifierData""")
            fxRisk = cur.fetchall()
            
        if fxRisk:
            return [str(r[0]) for r in fxRisk]
            
        return False
开发者ID:goldbena,项目名称:QuantAnalysis,代码行数:104,代码来源:FXClas.py

示例10: get_iris

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
from sklearn.decomposition import PCA
from dataset import *
import matplotlib.pyplot as plt
import matplotlib.cm as cmx
import matplotlib.colors as colors
from cache import StorageCache

dataset = get_iris()
# print('bandwidth:', dataset.get_bandwidth(force=True))

pca = PCA(n_components=2)
pca.fit(dataset.X)
X = pca.transform(dataset.X)
Y = dataset.Y
print('covariance:', pca.get_covariance())

def get_cmap(N):
    '''Returns a function that maps each index in 0, 1, ... N-1 to a distinct
        RGB color.'''
    color_norm = colors.Normalize(vmin=0, vmax=N - 1)
    scalar_map = cmx.ScalarMappable(norm=color_norm, cmap='hsv')

    def map_index_to_rgb_color(index):
        return scalar_map.to_rgba(index)

    def static(index):
        return 'rgb'[index]

    return map_index_to_rgb_color
    # return static
开发者ID:phizaz,项目名称:seeding-strategy-ssl,代码行数:32,代码来源:test_pca.py

示例11: map

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
        this_row = row[:-1]
        this_row = map(float, this_row)
        X.append(this_row)
        y.append(row[-1])
        if i > SAMPLE_SIZE:
            break
    X = np.array(X)
    data = X
    n_digits = 2

    print 'PCA'
    pca_start = time()
    pca = PCA(n_components=n_digits).fit(data)

    if DRAW_EIGEN_VALUE_PLOT:
        eig_val_cov, eig_vec_cov = np.linalg.eig(pca.get_covariance())
        eig_val_cov.sort()
        eig_val_cov = eig_val_cov[::-1]
        x_axis = [i for i in range(len(eig_val_cov))]
        plt.plot(x_axis, eig_val_cov, 'ro-', linewidth=2)
        plt.title('Eigen Value vs Component')
        plt.xlabel('Component')
        plt.ylabel('Eigen Value')
        #I don't like the default legend so I typically make mine like below, e.g.
        #with smaller fonts and a bit transparent so I do not cover up data, and make
        #it moveable by the viewer in case upper-right is a bad place for it 
        leg = plt.legend(['Eigenvalues from PCA'], loc='best', borderpad=0.3, 
                         shadow=False, prop=matplotlib.font_manager.FontProperties(size='small'),
                         markerscale=0.4)
        leg.get_frame().set_alpha(0.4)
        leg.draggable(state=True)
开发者ID:mukundvis,项目名称:Gatech-ML-Assignments,代码行数:33,代码来源:run.py

示例12: main

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
def main():
    
     data = pd.read_csv("mushrooms.csv")
#==============================================================================
#     print(data.head(6))
#     print("================================================")
#     print(data.isnull().sum())
#     print("=====================")
#     print(data['class'].unique())
#     print("=====================")
#     print(data.shape)
#==============================================================================
    
     labelencoder = LabelEncoder()
     for col in data.columns:
         data[col] = labelencoder.fit_transform(data[col])
        
    
 
     #print(data.head())
    
#==============================================================================
#      ax = sns.boxplot(x='class', y='stalk-color-above-ring',  data=data)
#      ax = sns.stripplot(x="class", y='stalk-color-above-ring',
#                    data=data, jitter=True,
#                    edgecolor="gray")
#      sns.plt.title("Class w.r.t stalkcolor above ring",fontsize=12)
#==============================================================================
    

     train_feature = data.iloc[:,1:23]
     test_feature = data.iloc[:, 0]
     
   #Heatmap  
#==============================================================================
#     data = pd.DataFrame(train_feature)
#     corrResult = data.corr()
#     sns.heatmap(corrResult)
#     plt.show()
#==============================================================================

#==============================================================================
#      # Build a classification task using 3 informative features
#      train_feature, test_feature = make_classification(n_samples=1000,
#                                 n_features=10,
#                                 n_informative=3,
#                                 n_redundant=0,
#                                 n_repeated=0,
#                                 n_classes=2,
#                                 random_state=0,
#                                 shuffle=False)
#      # Build a forest and compute the feature importance
#      forest = ExtraTreesClassifier(n_estimators=250, random_state=0)
#      forest.fit(train_feature, test_feature)
#      importances = forest.feature_importances_
#      for index in range(len(train_feature[0])):
#          print ("Importance of feature ", index, "is", importances[index])
#==============================================================================
     
     # Scale the data to be between -1 and 1
     scaler = StandardScaler()
     train_feature = scaler.fit_transform(train_feature)
     
     pca = PCA()
     pca.fit_transform(train_feature)
     covariance = pca.get_covariance()
     explained_variance=pca.explained_variance_
     print(explained_variance)
      
     
     # Splitting the data into training and testing dataset
     X_train, X_test, y_train, y_test = train_test_split(train_feature,test_feature,test_size=0.2,random_state=4)
     
     print("==============================================================")
     print("                     Logistic Regression                      ")
     print("==============================================================")
     
     # Logistic Regression
     logic = LogisticRegression()
     parameters_logic = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] ,
               'penalty':['l1','l2']
                    }
     logic_grid_search = GridSearchCV(logic, parameters_logic,cv=10)
     logic_grid_search.fit(X_train,y_train)
     
     # Positive class prediction probabilities
     y_prob = logic_grid_search.predict_proba(X_test)[:,1]   
     # Threshold the probabilities to give class predictions.
     y_pred = np.where(y_prob > 0.5, 1, 0)
     
     print("Logic Regresion result: ",logic_grid_search.score(X_test, y_pred),"%")
     print("Best parameters for this model are: ",logic_grid_search.best_params_)
     
     print("==============================================================")
     print("                        Naive Bayes                           ")
     print("==============================================================")
     
     # Gaussian Naive Bayes
     naive = GaussianNB()
     naive.fit(X_train, y_train)
#.........这里部分代码省略.........
开发者ID:NicuVlasin,项目名称:Machine-Learning,代码行数:103,代码来源:Assignment_2.py

示例13: PCA

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
pca = PCA(whiten=False)
data = win.getData()
# win.shuffleIt(data, 2)
# win.shuffleIt(data, 1)
pca.fit(data)
'''plot'''
# expl_var_cumultative = [x / max(list(accumulate(pca.explained_variance_))) for x in list(accumulate(pca.explained_variance_))]
# plt.plot(range(len(pca.explained_variance_)), expl_var_cumultative,
# 'bo-', label="real")
ndims = range(1, len(pca.explained_variance_ratio_)+1)
expl_var_cumultative = list(accumulate(pca.explained_variance_ratio_))
plt.plot(ndims, expl_var_cumultative, 'r*', label='real - after PCA', markersize=10)
#
var = []
for x in range(27):
    var.append(pca.get_covariance()[x][x])
var_r = var / sum(var)
var_r_ac = list(accumulate(sorted(var_r, reverse=True)))
plt.plot(ndims, var_r_ac, 'yo', label='real - before PCA', markersize=10)
#
# lbls = win.getCoursesNames()
# cdict = {lbls[ii]: v for ii, v in enumerate(var_r)}
# for key, value in sorted(cdict.items(), key=lambda it: it[1], reverse=True):
#     print(key, value)
#
# print('\nCOMPZ:')
# compz = pca.components_[0, :]
# cdict_pca = {lbls[ii]: -v for ii, v in enumerate(compz)}
# for key, val in sorted(cdict_pca.items(), key=lambda it: it[1], reverse=True):
#     print(key, ": ", val)
开发者ID:l-liciniuslucullus,项目名称:strident-octo-spork,代码行数:32,代码来源:pca.py

示例14: fit

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import get_covariance [as 别名]
 def fit(self, L, U, max_it=1000, p1='euclidean',p2='mahalanobis',pool_size=100):
     metrics = [p1,p2]
     # Initialize Training Sets
     L1 = Data(np.copy(L.X), np.copy(L.y))
     L2 = Data(np.copy(L.X), np.copy(L.y))
     Ls = [L1, L2]
     # Select pool of unlabeled data
     Upool_indexs = np.random.choice(len(U), pool_size, replace=False)
     Upool = [U[i] for i in Upool_indexs]
     
     
     # Create the two kNN regressors
     kNNs = []
     for m in metrics:
         r = None
         if m == 'mahalanobis':
             pca = PCA()
             pca.fit(L.X)
             v = pca.get_covariance()
             r = KNeighborsRegressor(n_neighbors=self.k,metric=m, V=v)
         else:
             r = KNeighborsRegressor(n_neighbors=self.k,metric=m)
         kNNs.append(r)
     # train regressors on both sets
     for i in [0,1]:
         kNNs[i].fit(Ls[i].X, Ls[i].y)
     
     # repeat for max_it rounds
     for i in range(max_it):
         print i
         # keep list of changes to Ls
         pi = [[],[]]
         # for each training and regressor set
         for j in [0,1]:
             #print j
             Upool_ys = kNNs[j].predict(Upool)
             # get the neighbors of each unlabeled point - as indexs of the orig lists
             Upool_ns = kNNs[j].kneighbors(Upool, return_distance=False)
             
             deltas = []
             for r in xrange(len(Upool)):
                 Lj_alt = Union(Ls[j], Upool[r], Upool_ys[r])
                 alt_kNN = None
                 m = metrics[j]
                 if m == 'mahalanobis':
                     pca.fit(Lj_alt.X)
                     v = pca.get_covariance()
                     alt_kNN = KNeighborsRegressor(n_neighbors=self.k,metric=m, V=v)
                 else:
                     alt_kNN = KNeighborsRegressor(n_neighbors=self.k,metric=m)
                 alt_kNN.fit(Lj_alt.X, Lj_alt.y)
                 
                 neighbors_indexs = Upool_ns[r]
                 neighbors = [Ls[j].X[n] for n in neighbors_indexs]
                 
                 kNN_n_ys = kNNs[j].predict(neighbors)
                 altkNN_n_ys = alt_kNN.predict(neighbors)
                 real_n_ys = [Ls[j].y[n] for n in neighbors_indexs]
                 delta = 0
                 for n in xrange(self.k):
                     orig_diff = real_n_ys[n] - kNN_n_ys[n]
                     alt_diff = real_n_ys[n] - altkNN_n_ys[n]
                     delta += orig_diff**2 - alt_diff**2
                 deltas.append(delta)
                 
             sorted_ds = sorted(deltas)[::-1]
             if sorted_ds[0] > 0:
                 highest = sorted_ds[0]
                 index = deltas.index(highest)
                 xj = Upool[index]
                 yj = Upool_ys[index]
                 
                 pi[j] = [(xj,yj)]
                 
                 uIndex = U.tolist().index(xj.tolist())
                 np.delete(U, uIndex)
         
         newLs = Ls
         replenishCount = 0
         for i in [0,1]:
             for px,py in pi[1-i]:
                 replenishCount += 1
                 newLs[i] = Union(newLs[i],px,py)
         # if no changes need to be made, we have converged 
         empty = True
         for a in pi:
             if a:
                 empty = False
         
         if empty:
             break
         
         # else make changes, retrain, and replinesh untrained pool
         Ls = newLs
         for i in [0,1]:
             kNNs[i].fit(Ls[i].X, Ls[i].y)
         #Upool_indexs = np.random.choice(len(U), replenishCount, replace=False)
         #Upool_addition = [U[i] for i in Upool_indexs]
         #Upool = np.append(Upool, Upool_addition, axis=0)
         Upool_indexs = np.random.choice(len(U), pool_size, replace=False)
#.........这里部分代码省略.........
开发者ID:adonelick,项目名称:RestaurantRevenue,代码行数:103,代码来源:semi_supervised_knn.py


注:本文中的sklearn.decomposition.PCA.get_covariance方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。