本文整理汇总了Python中sklearn.decomposition.pca.PCA.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.fit_transform方法的具体用法?Python PCA.fit_transform怎么用?Python PCA.fit_transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.pca.PCA
的用法示例。
在下文中一共展示了PCA.fit_transform方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classify_for_benchmark
# 需要导入模块: from sklearn.decomposition.pca import PCA [as 别名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 别名]
def classify_for_benchmark(data_set_df, user_info_df, features, label='gender', classifier=None, num=None):
instance_num = len(data_set_df.columns)
x = data_set_df.loc[features]
x = x.dropna(how='all', axis=0)
x = x.dropna(how='all', axis=1)
imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=1)
x_replaced = x.replace([np.inf, -np.inf], np.nan)
x_imp = imp.transform(x_replaced)
y = user_info_df.get(label)
y_filtered = y[(map(int, x.columns.values))]
clf = nb.BernoulliNB() if classifier is None else classifier
cv_num = min(len(y_filtered), 10)
if cv_num <= 1 or len(y_filtered.unique()) <= 1:
return 0.0, 100.0
else:
final_score = 0.0
for i in range(100):
score = 0.0
cnt = 0
skf = StratifiedKFold(y_filtered, n_folds=cv_num, shuffle=True)
for tr_index, te_index in skf:
x_train, x_test = x_imp.T[tr_index], x_imp.T[te_index]
y_train, y_test = y_filtered.iloc[tr_index], y_filtered.iloc[te_index]
min_num = min(len(x_train), len(x_train.T), len(x_test), len(x_test.T), num)
pca = PCA(min_num)
x_train = pca.fit_transform(x_train)
x_test = pca.fit_transform(x_test)
try:
clf.fit(x_train, y_train)
score += clf.score(x_test, y_test)
cnt += 1
# cv_score = cross_validation.cross_val_score(clf, x_imp.T, y_filtered, cv=cv_num)
except ValueError:
traceback.print_exc()
print i, "why error? skip!"
if cnt > 0:
score /= cnt
print i, score
else:
return 0.0, (float(instance_num - len(y_filtered)) / instance_num)
final_score += score
final_score /= 100
miss_clf_rate = (float(instance_num - len(y_filtered)) / instance_num)
return final_score, miss_clf_rate
示例2: LogisticClassifier
# 需要导入模块: from sklearn.decomposition.pca import PCA [as 别名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 别名]
class LogisticClassifier(object):
def __init__(self, learning_rate=0.01, reg=0., momentum=0.5):
self.classifier = LogisticRegression(learning_rate, reg, momentum)
self.pca = None
self.scaler = None
def sgd_optimize(self, data, n_epochs, mini_batch_size):
data = self._preprocess_data(data)
sgd_optimization(data, self.classifier, n_epochs, mini_batch_size)
def _preprocess_data(self, data):
# center data and scale to unit std
if self.scaler is None:
self.scaler = StandardScaler()
data = self.scaler.fit_transform(data)
else:
data = self.scaler.transform(data)
if self.pca is None:
# use minika's mle to guess appropriate dimension
self.pca = PCA(n_components='mle')
data = self.pca.fit_transform(data)
else:
data = self.pca.transform(data)
return data
示例3: test_pipeline_transform
# 需要导入模块: from sklearn.decomposition.pca import PCA [as 别名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 别名]
def test_pipeline_transform():
# Test whether pipeline works with a transformer at the end.
# Also test pipline.transform and pipeline.inverse_transform
iris = load_iris()
X = iris.data
pca = PCA(n_components=2)
pipeline = Pipeline([('pca', pca)])
# test transform and fit_transform:
X_trans = pipeline.fit(X).transform(X)
X_trans2 = pipeline.fit_transform(X)
X_trans3 = pca.fit_transform(X)
assert_array_almost_equal(X_trans, X_trans2)
assert_array_almost_equal(X_trans, X_trans3)
X_back = pipeline.inverse_transform(X_trans)
X_back2 = pca.inverse_transform(X_trans)
assert_array_almost_equal(X_back, X_back2)
示例4: PCA
# 需要导入模块: from sklearn.decomposition.pca import PCA [as 别名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 别名]
X_test = pd.read_csv('Data/test.csv', header=None).as_matrix()
y = pd.read_csv('Data/trainLabels.csv', header=None)[0].as_matrix()
X = pd.read_csv('Data/train.csv', header=None).as_matrix()
pca2 = PCA(n_components=2, whiten=True)
pca2.fit(np.r_[X, X_test])
X_pca = pca2.transform(X)
i0 = np.argwhere(y == 0)[:, 0]
i1 = np.argwhere(y == 1)[:, 0]
X0 = X_pca[i0, :]
X1 = X_pca[i1, :]
plt.plot(X0[:, 0], X0[:, 1], 'ro')
plt.plot(X1[:, 0], X1[:, 1], 'b*')
pca = PCA(whiten=True)
X_all = pca.fit_transform(np.r_[X, X_test])
print (pca.explained_variance_ratio_)
def kde_plot(x):
from scipy.stats.kde import gaussian_kde
kde = gaussian_kde(x)
positions = np.linspace(x.min(), x.max())
smoothed = kde(positions)
plt.plot(positions, smoothed)
def qq_plot(x):
from scipy.stats import probplot
probplot(x, dist='norm', plot=plt)
kde_plot(X_all[:, 0])
kde_plot(X_all[:, 2])
示例5: xrange
# 需要导入模块: from sklearn.decomposition.pca import PCA [as 别名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 别名]
# eng = matlab.engine.start_matlab()
X=[]
for i in xrange(1,30):
file=open('J_Left/'+`i`)
mylist=[]
x=0
for line in file:
line=line[:-1]
temp=line.split(' ')
for i in range(len(temp)-1):
# print temp[i]
mylist.append(float(temp[i]))
mylist=mylist+[0]*(5000*9-len(mylist))
X.append(mylist)
print 'len of X',len(X)
pca=PCA(n_components=4)
t=pca.fit_transform(X)
l=[]
for v in t:
arr=[]
for e in v:
f=float(e)
arr.append(f)
l.append(arr)
# ret = eng.moh_pca(l)
print l
print type(l)
print 'len of t',len(t)
示例6: range
# 需要导入模块: from sklearn.decomposition.pca import PCA [as 别名]
# 或者: from sklearn.decomposition.pca.PCA import fit_transform [as 别名]
# plt.show()
plt.draw()
# plt.savefig("some_digits.png")
### Preprocessing data ###
# Standardize features by scaling them to the range (0,1)
# Note: this standardization is often used as an alternative to
# zero mean, unit variance scaling (performed with sklearn.preprocessing.scale)
min_max_scaler = MinMaxScaler(feature_range=(0, 1))
train_minmax = min_max_scaler.fit_transform(train)
# PCA
n_pc = 78 # principal components to keep
pca = PCA(n_components=n_pc, whiten=True) # whitening to remove correlations
train_pca = pca.fit_transform(train_minmax)
# plot all the principal components with their relative explained variance
features = [x for x in range(1, n_pc + 1)]
plt.figure(2)
# percentage of variance explained by each of the selected components.
# The sum of explained variances is equal to 1.0
# plt.plot(features, pca.explained_variance_ratio_, 'g--', marker='o')
plt.semilogy(features, pca.explained_variance_ratio_, "g--", marker="o")
plt.axis([1, n_pc, 0, pca.explained_variance_ratio_.max()])
plt.grid(True)
plt.xlabel("principal components"), plt.ylabel("variance explained (log)")
plt.title("scree plot")
# plt.savefig("screeplot_" + str(n_pc) + "_PC.png")
### Train a SVM Classifier ###