本文整理匯總了Python中sklearn.decomposition.pca.PCA.fit方法的典型用法代碼示例。如果您正苦於以下問題:Python PCA.fit方法的具體用法?Python PCA.fit怎麽用?Python PCA.fit使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.decomposition.pca.PCA
的用法示例。
在下文中一共展示了PCA.fit方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: pca_plot
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def pca_plot(fp_list, clusters):
np_fps = []
for fp in fp_list:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
pca = PCA(n_components=3)
pca.fit(np_fps)
np_fps_r = pca.transform(np_fps)
p1 = figure(x_axis_label="PC1",
y_axis_label="PC2",
title="PCA clustering of PAINS")
p2 = figure(x_axis_label="PC2",
y_axis_label="PC3",
title="PCA clustering of PAINS")
color_vector = ["blue", "red", "green", "orange", "pink", "cyan", "magenta",
"brown", "purple"]
print len(set(clusters))
for clust_num in set(clusters):
print clust_num
local_cluster = []
for i in xrange(len(clusters)):
if clusters[i] == clust_num:
local_cluster.append(np_fps_r[i])
print len(local_cluster)
p1.scatter(np_fps_r[:,0], np_fps_r[:,1],
color=color_vector[clust_num])
p2.scatter(np_fps_r[:,1], np_fps_r[:,2],
color=color_vector[clust_num])
return HBox(p1, p2)
示例2: pca_prefit
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def pca_prefit(weights, xs):
"""
SOMの初期値を計算するための前処理.
線形変換によって重みベクトル列の主成分とその固有値を入力ベクトル列のものと一致させる.
:param weights: 初期重みベクトル列
:param xs: 入力ベクトル列
:return: 前処理した重みベクトル列
"""
n = np.shape(xs)[1]
pca_w = PCA(n_components=n)
pca_w.fit(weights)
pca_x = PCA(n_components=n)
pca_x.fit(xs)
mean_w = np.mean(weights, axis=0)
mean_x = np.mean(xs, axis=0)
com_w = pca_w.components_
com_x = pca_x.components_
var_w = pca_w.explained_variance_
var_x = pca_x.explained_variance_
var_w[var_w == 0] = np.max(var_w) * 1e-6
new_w = (weights - mean_w).dot(com_w.T) / np.sqrt(var_w)
new_w = (new_w * np.sqrt(var_x)).dot(com_x) + mean_x
return new_w
示例3: main
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def main():
print('Reading in data file...')
data = pd.read_csv(path + 'Sentiment Analysis Dataset.csv',
usecols=['Sentiment', 'SentimentText'], error_bad_lines=False)
print('Pre-processing tweet text...')
corpus = data['SentimentText']
vectorizer = TfidfVectorizer(decode_error='replace', strip_accents='unicode',
stop_words='english', tokenizer=tokenize)
X = vectorizer.fit_transform(corpus.values)
y = data['Sentiment'].values
print('Training sentiment classification model...')
classifier = MultinomialNB()
classifier.fit(X, y)
print('Training word2vec model...')
corpus = corpus.map(lambda x: tokenize(x))
word2vec = Word2Vec(corpus.tolist(), size=100, window=4, min_count=10, workers=4)
word2vec.init_sims(replace=True)
print('Fitting PCA transform...')
word_vectors = [word2vec[word] for word in word2vec.vocab]
pca = PCA(n_components=2)
pca.fit(word_vectors)
print('Saving artifacts to disk...')
joblib.dump(vectorizer, path + 'vectorizer.pkl')
joblib.dump(classifier, path + 'classifier.pkl')
joblib.dump(pca, path + 'pca.pkl')
word2vec.save(path + 'word2vec.pkl')
print('Process complete.')
示例4: pca
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def pca(target, control, title, name_one, name_two):
np_fps = []
for fp in target + control:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
ys_fit = [1] * len(target) + [0] * len(control)
names = ["PAINS", "Control"]
pca = PCA(n_components=3)
pca.fit(np_fps)
np_fps_r = pca.transform(np_fps)
p1 = figure(x_axis_label="PC1",
y_axis_label="PC2",
title=title)
p1.scatter(np_fps_r[:len(target), 0], np_fps_r[:len(target), 1],
color="blue", legend=name_one)
p1.scatter(np_fps_r[len(target):, 0], np_fps_r[len(target):, 1],
color="red", legend=name_two)
p2 = figure(x_axis_label="PC2",
y_axis_label="PC3",
title=title)
p2.scatter(np_fps_r[:len(target), 1], np_fps_r[:len(target), 2],
color="blue", legend=name_one)
p2.scatter(np_fps_r[len(target):, 1], np_fps_r[len(target):, 2],
color="red", legend=name_two)
return HBox(p1, p2)
示例5: pca
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def pca(tx, ty, rx, ry):
compressor = PCA(n_components = tx[1].size/2)
compressor.fit(tx, y=ty)
newtx = compressor.transform(tx)
newrx = compressor.transform(rx)
em(newtx, ty, newrx, ry, add="wPCAtr", times=10)
km(newtx, ty, newrx, ry, add="wPCAtr", times=10)
nn(newtx, ty, newrx, ry, add="wPCAr")
示例6: pca
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def pca(tx, ty, rx, ry):
print "pca"
compressor = PCA(n_components = tx[1].size/2)
compressor.fit(tx, y=ty)
newtx = compressor.transform(tx)
newrx = compressor.transform(rx)
em(newtx, ty, newrx, ry, add="wPCAtr")
km(newtx, ty, newrx, ry, add="wPCAtr")
nn(newtx, ty, newrx, ry, add="wPCAtr")
print "pca done"
示例7: PCA
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def PCA佮SVM模型(self, 問題, 答案):
sample_weight_constant = np.ones(len(問題))
clf = svm.SVC(C=1)
pca = PCA(n_components=100)
# clf = svm.NuSVC()
print('訓練PCA')
pca.fit(問題)
print('訓練SVM')
clf.fit(pca.transform(問題), 答案, sample_weight=sample_weight_constant)
print('訓練了')
return lambda 問:clf.predict(pca.transform(問))
示例8: train_pca
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def train_pca(pains_fps, num_components=3):
'''
Dimensional reduction of fps bit vectors to principal components
:param pains_fps:
:return: pca reduced fingerprints bit vectors
'''
np_fps = []
for fp in pains_fps:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
pca = PCA(n_components=num_components)
pca.fit(np_fps)
fps_reduced = pca.transform(np_fps)
return fps_reduced
示例9: calc_pca
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def calc_pca(bnd, npc=None, preaverage=False, use_unbiased=False, \
method='mdp'):
'''
Parameters
----------
bnd : BinnedData
binned data
npc : int or None, optional
number of PCs to calculate, defaults to None
preaverage : bool
average across repeats?
Returns
-------
score : ndarray
(npc, nobs)
weight : ndarray
(npc, nvar)
'''
assert method in ['mdp', 'skl']
data = format_for_fa(bnd, preaverage=preaverage,
use_unbiased=use_unbiased)
if method == 'mdp':
pca_node = mdp.nodes.PCANode(output_dim=npc)
score = pca_node.execute(data)
weight = pca_node.get_projmatrix()
elif method == 'skl':
pca_obj = PCA(n_components=npc)
score = pca_obj.fit(data).transform(data)
weight = pca_obj.components_.T
return score.T, weight.T
示例10: reduction
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def reduction(data, params):
# parse parameters
for item in params:
if isinstance(params[item], str):
exec(item+'='+'"'+params[item]+'"')
else:
exec(item+'='+str(params[item]))
# apply PCA
pca = PCA(n_components=n_components)
pca.fit(data)
X = pca.transform(data)
return X
示例11: airline_pca
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def airline_pca():
X = np.array(pca_data)
pca = PCA(n_components=3)
pca.fit(X)
Y=pca.transform(normalize(X))
fig = plt.figure(1, figsize=(8, 6))
ax = Axes3D(fig, elev=-150, azim=110)
colordict = {carrier:i for i,carrier in enumerate(major_carriers)}
pointcolors = [colordict[carrier] for carrier in target_carrier]
ax.scatter(Y[:, 0], Y[:, 1], Y[:, 2], c=pointcolors)
ax.set_title("First three PCA directions")
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])
示例12: pca_no_labels
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def pca_no_labels(target, title="PCA clustering of PAINS", color="blue"):
np_fps = []
for fp in target:
arr = numpy.zeros((1,))
DataStructs.ConvertToNumpyArray(fp, arr)
np_fps.append(arr)
pca = PCA(n_components=3)
pca.fit(np_fps)
np_fps_r = pca.transform(np_fps)
p3 = figure(x_axis_label="PC1",
y_axis_label="PC2",
title=title)
p3.scatter(np_fps_r[:, 0], np_fps_r[:, 1], color=color)
p4 = figure(x_axis_label="PC2",
y_axis_label="PC3",
title=title)
p4.scatter(np_fps_r[:, 1], np_fps_r[:, 2], color=color)
return HBox(p3, p4)
示例13: dimensional
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def dimensional(tx, ty, rx, ry, add=None):
print "pca"
for j in range(tx[1].size):
i = j + 1
print "===" + str(i)
compressor = PCA(n_components = i)
t0 = time()
compressor.fit(tx, y=ty)
newtx = compressor.transform(tx)
runtime=time() - t0
V = compressor.components_
print runtime, V.shape, compressor.score(tx)
distances = np.linalg.norm(tx-compressor.inverse_transform(newtx))
print distances
print "pca done"
print "ica"
for j in range(tx[1].size):
i = j + 1
print "===" + str(i)
compressor = ICA(whiten=True)
t0 = time()
compressor.fit(tx, y=ty)
newtx = compressor.transform(tx)
runtime=time() - t0
print newtx.shape, runtime
distances = np.linalg.norm(tx-compressor.inverse_transform(newtx))
print distances
print "ica done"
print "RP"
for j in range(tx[1].size):
i = j + 1
print "===" + str(i)
compressor = RandomProjection(n_components=i)
t0 = time()
compressor.fit(tx, y=ty)
newtx = compressor.transform(tx)
runtime=time() - t0
shape = newtx.shape
print runtime, shape
print "RP done"
print "K-best"
for j in range(tx[1].size):
i = j + 1
print "===" + str(i)
compressor = best(add, k=i)
t0 = time()
compressor.fit(tx, y=ty.ravel())
newtx = compressor.transform(tx)
runtime=time() - t0
shape = newtx.shape
print runtime, shape
print "K-best done"
示例14: do_train_with_freq
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def do_train_with_freq():
tf_mix = TrainFiles(train_path = train_path_mix, labels_file = labels_file, test_size = 0.)
tf_freq = TrainFiles(train_path = train_path_freq, labels_file = labels_file, test_size = 0.)
X_m, Y_m, _, _ = tf_mix.prepare_inputs()
X_f, Y_f, _, _ = tf_freq.prepare_inputs()
X = np.c_[X_m, X_f]
Y = Y_f
X, Xt, Y, Yt = train_test_split(X, Y, test_size = 0.1)
sl = SKSupervisedLearning(SVC, X, Y, Xt, Yt)
sl.fit_standard_scaler()
pca = PCA(250)
pca.fit(np.r_[sl.X_train_scaled, sl.X_test_scaled])
X_pca = pca.transform(sl.X_train_scaled)
X_pca_test = pca.transform(sl.X_test_scaled)
#sl.train_params = {'C': 100, 'gamma': 0.0001, 'probability' : True}
#print "Start SVM: ", time_now_str()
#sl_ll_trn, sl_ll_tst = sl.fit_and_validate()
#print "Finish Svm: ", time_now_str()
##construct a dataset for RBM
#X_rbm = X[:, 257:]
#Xt_rbm = X[:, 257:]
#rng = np.random.RandomState(123)
#rbm = RBM(X_rbm, n_visible=X_rbm.shape[1], n_hidden=X_rbm.shape[1]/4, numpy_rng=rng)
#pretrain_lr = 0.1
#k = 2
#pretraining_epochs = 200
#for epoch in xrange(pretraining_epochs):
# rbm.contrastive_divergence(lr=pretrain_lr, k=k)
# cost = rbm.get_reconstruction_cross_entropy()
# print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost
trndata, tstdata = createDataSets(X_pca, Y, X_pca_test, Yt)
fnn = train(trndata, tstdata, epochs = 1000, test_error = 0.025, momentum = 0.2, weight_decay = 0.0001)
示例15: showDataTable
# 需要導入模塊: from sklearn.decomposition.pca import PCA [as 別名]
# 或者: from sklearn.decomposition.pca.PCA import fit [as 別名]
def showDataTable():
title = "Descriptive statistics"
df = frame[cols]
data_dsc = df.describe().transpose()
# dsc = df.describe()
pca = PCA(n_components=5)
pca.fit(df)
pc = pca.explained_variance_ratio_
data_corr = df.corr()
eigenValues, eigenVectors = LA.eig(data_corr)
idx = eigenValues.argsort()[::-1]
# print sorted(eigenValues, key=int, reverse=True)
print eigenValues.argsort()[::-1]
print eigenValues.argsort()
eigenValues = pd.DataFrame(eigenValues[idx]).transpose()
eigenVectors = pd.DataFrame(eigenVectors[:, idx])
return render_template("showDataTable.html", title=title, data=df, data_dsc=data_dsc, pca=pd.DataFrame(pc).transpose(),data_corr=data_corr, w=eigenValues, v=eigenVectors)