當前位置: 首頁>>代碼示例>>Python>>正文


Python PCA.fit_transform方法代碼示例

本文整理匯總了Python中sklearn.decomposition.pca.PCA.fit_transform方法的典型用法代碼示例。如果您正苦於以下問題:Python PCA.fit_transform方法的具體用法?Python PCA.fit_transform怎麽用?Python PCA.fit_transform使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.decomposition.pca.PCA的用法示例。


在下文中一共展示了PCA.fit_transform方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: classify_for_benchmark

# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 別名]
def classify_for_benchmark(data_set_df, user_info_df, features, label='gender', classifier=None, num=None):
    instance_num = len(data_set_df.columns)
    x = data_set_df.loc[features]
    x = x.dropna(how='all', axis=0)
    x = x.dropna(how='all', axis=1)

    imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=1)
    x_replaced = x.replace([np.inf, -np.inf], np.nan)
    x_imp = imp.transform(x_replaced)

    y = user_info_df.get(label)
    y_filtered = y[(map(int, x.columns.values))]

    clf = nb.BernoulliNB() if classifier is None else classifier
    cv_num = min(len(y_filtered), 10)
    if cv_num <= 1 or len(y_filtered.unique()) <= 1:
        return 0.0, 100.0
    else:
        final_score = 0.0
        for i in range(100):
            score = 0.0
            cnt = 0
            skf = StratifiedKFold(y_filtered, n_folds=cv_num, shuffle=True)
            for tr_index, te_index in skf:
                x_train, x_test = x_imp.T[tr_index], x_imp.T[te_index]
                y_train, y_test = y_filtered.iloc[tr_index], y_filtered.iloc[te_index]
                min_num = min(len(x_train), len(x_train.T), len(x_test), len(x_test.T), num)
                pca = PCA(min_num)
                x_train = pca.fit_transform(x_train)
                x_test = pca.fit_transform(x_test)

                try:
                    clf.fit(x_train, y_train)
                    score += clf.score(x_test, y_test)
                    cnt += 1
                    # cv_score = cross_validation.cross_val_score(clf, x_imp.T, y_filtered, cv=cv_num)
                except ValueError:
                    traceback.print_exc()
                    print i, "why error? skip!"
            if cnt > 0:
                score /= cnt
                print i, score
            else:
                return 0.0, (float(instance_num - len(y_filtered)) / instance_num)
            final_score += score
        final_score /= 100
        miss_clf_rate = (float(instance_num - len(y_filtered)) / instance_num)
        return final_score, miss_clf_rate
開發者ID:heevery,項目名稱:ohp,代碼行數:50,代碼來源:classifier.py

示例2: LogisticClassifier

# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 別名]
class LogisticClassifier(object):
    def __init__(self, learning_rate=0.01, reg=0., momentum=0.5):
        self.classifier = LogisticRegression(learning_rate, reg, momentum)
        self.pca = None
        self.scaler = None

    def sgd_optimize(self, data, n_epochs, mini_batch_size):
        data = self._preprocess_data(data)
        sgd_optimization(data, self.classifier, n_epochs, mini_batch_size)

    def _preprocess_data(self, data):
        # center data and scale to unit std
        if self.scaler is None:
             self.scaler = StandardScaler()
             data = self.scaler.fit_transform(data)
        else:
            data = self.scaler.transform(data)

        if self.pca is None:
            # use minika's mle to guess appropriate dimension
            self.pca = PCA(n_components='mle')
            data = self.pca.fit_transform(data)
        else:
            data = self.pca.transform(data)

        return data
開發者ID:joshloyal,項目名稱:statlearn,代碼行數:28,代碼來源:logreg.py

示例3: test_pipeline_transform

# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 別名]
def test_pipeline_transform():
    # Test whether pipeline works with a transformer at the end.
    # Also test pipline.transform and pipeline.inverse_transform
    iris = load_iris()
    X = iris.data
    pca = PCA(n_components=2)
    pipeline = Pipeline([('pca', pca)])

    # test transform and fit_transform:
    X_trans = pipeline.fit(X).transform(X)
    X_trans2 = pipeline.fit_transform(X)
    X_trans3 = pca.fit_transform(X)
    assert_array_almost_equal(X_trans, X_trans2)
    assert_array_almost_equal(X_trans, X_trans3)

    X_back = pipeline.inverse_transform(X_trans)
    X_back2 = pca.inverse_transform(X_trans)
    assert_array_almost_equal(X_back, X_back2)
開發者ID:PepGardiola,項目名稱:scikit-learn,代碼行數:20,代碼來源:test_pipeline.py

示例4: PCA

# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 別名]
X_test = pd.read_csv('Data/test.csv', header=None).as_matrix()
y = pd.read_csv('Data/trainLabels.csv', header=None)[0].as_matrix()
X = pd.read_csv('Data/train.csv', header=None).as_matrix()

pca2 = PCA(n_components=2, whiten=True)
pca2.fit(np.r_[X, X_test])
X_pca = pca2.transform(X)
i0 = np.argwhere(y == 0)[:, 0]
i1 = np.argwhere(y == 1)[:, 0]
X0 = X_pca[i0, :]
X1 = X_pca[i1, :]
plt.plot(X0[:, 0], X0[:, 1], 'ro')
plt.plot(X1[:, 0], X1[:, 1], 'b*')

pca = PCA(whiten=True)
X_all = pca.fit_transform(np.r_[X, X_test])
print (pca.explained_variance_ratio_)

def kde_plot(x):
        from scipy.stats.kde import gaussian_kde
        kde = gaussian_kde(x)
        positions = np.linspace(x.min(), x.max())
        smoothed = kde(positions)
        plt.plot(positions, smoothed)

def qq_plot(x):
    from scipy.stats import probplot
    probplot(x, dist='norm', plot=plt)
    
kde_plot(X_all[:, 0])
kde_plot(X_all[:, 2])
開發者ID:ivanliu1989,項目名稱:Data-Science-London,代碼行數:33,代碼來源:dsl.py

示例5: xrange

# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 別名]
# eng = matlab.engine.start_matlab()
X=[]
for i in xrange(1,30):
    file=open('J_Left/'+`i`)
    mylist=[]
    x=0
    for line in file:
        line=line[:-1]
        temp=line.split(' ')
        for i in range(len(temp)-1):
#             print temp[i]
            mylist.append(float(temp[i]))
    mylist=mylist+[0]*(5000*9-len(mylist))
    X.append(mylist)
print 'len of X',len(X)
pca=PCA(n_components=4)
t=pca.fit_transform(X)
l=[]
for v in t:
    arr=[]
    for e in v:
        f=float(e)
        arr.append(f)
    l.append(arr)
# ret = eng.moh_pca(l)

print l
print type(l)
print 'len of t',len(t)

開發者ID:S-Mohammad-Hashemi,項目名稱:GestFi,代碼行數:31,代碼來源:ML_learning.py

示例6: range

# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 別名]
# plt.show()
plt.draw()
# plt.savefig("some_digits.png")

### Preprocessing data ###

# Standardize features by scaling them to the range (0,1)
# Note: this standardization is often used as an alternative to
# zero mean, unit variance scaling (performed with sklearn.preprocessing.scale)
min_max_scaler = MinMaxScaler(feature_range=(0, 1))
train_minmax = min_max_scaler.fit_transform(train)

# PCA
n_pc = 78  # principal components to keep
pca = PCA(n_components=n_pc, whiten=True)  # whitening to remove correlations
train_pca = pca.fit_transform(train_minmax)

# plot all the principal components with their relative explained variance
features = [x for x in range(1, n_pc + 1)]
plt.figure(2)
# percentage of variance explained by each of the selected components.
# The sum of explained variances is equal to 1.0
# plt.plot(features, pca.explained_variance_ratio_, 'g--', marker='o')
plt.semilogy(features, pca.explained_variance_ratio_, "g--", marker="o")
plt.axis([1, n_pc, 0, pca.explained_variance_ratio_.max()])
plt.grid(True)
plt.xlabel("principal components"), plt.ylabel("variance explained (log)")
plt.title("scree plot")
# plt.savefig("screeplot_" + str(n_pc) + "_PC.png")

### Train a SVM Classifier ###
開發者ID:jackaljack,項目名稱:digit-recognizer-kaggle,代碼行數:33,代碼來源:digit-recognizer.py


注:本文中的sklearn.decomposition.pca.PCA.fit_transform方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。