本文整理汇总了Python中sklearn.decomposition.PCA类的典型用法代码示例。如果您正苦于以下问题:Python PCA类的具体用法?Python PCA怎么用?Python PCA使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PCA类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: pcafunction
def pcafunction(dataList,countList,nameList):
from sklearn.decomposition import PCA
import pylab as pl
pcadataArray = np.array(dataList)
pcaCountArray = np.array(countList)
pca = PCA(n_components=2)
X = pca.fit(pcadataArray).transform(pcadataArray)
pcaNameList = []
for i in range(0,len(nameList)):
if nameList[i] not in pcaNameList:
pcaNameList.append(nameList[i])
print('explained variance ratio (first two components): %s'
% str(pca.explained_variance_ratio_))
plt.plot(X[pcaCountArray == 0, 0], X[pcaCountArray == 0, 1], 'or',
X[pcaCountArray == 1, 0], X[pcaCountArray == 1, 1], '^b',
X[pcaCountArray == 2, 0], X[pcaCountArray == 2, 1], 'sg'
)
plt.xlabel('PC1 (explained variance ratio: ' + str(pca.explained_variance_ratio_[0])+')',fontsize=14)
plt.ylabel('PC2 (explained variance ratio: ' + str(pca.explained_variance_ratio_[1])+')',fontsize=14)
plt.legend((str(pcaNameList[0]),str(pcaNameList[1])),loc='best',fontsize=14)
plt.title('PCA',fontsize=16)
示例2: add_tsne_features
def add_tsne_features(x_train, x_test):
print('add_tsne_features <<')
x_train_data = x_train.data_
x_test_data = x_test.data_
x = np.vstack((x_train_data, x_test_data))
print('applying pca...')
pca = PCA(n_components=25)
x_pca = pca.fit_transform(x)
print('applying t-SNE...')
tsne_model = TSNE(n_components=2, random_state=0)
x_tsne = tsne_model.fit_transform(x_pca)
x_train_data = np.hstack((x_train_data, x_tsne[:x_train_data.shape[0], :]))
x_test_data = np.hstack((x_test_data, x_tsne[-x_test_data.shape[0]:, :]))
assert(x_train.columns_ == x_test.columns_)
columns = x_train.columns_ + ['tsne_1', 'tsne_2']
x_train = DataSet(x_train.ids_, columns, x_train_data)
x_test = DataSet(x_test.ids_, columns, x_test_data)
print('add_tsne_features >>')
return x_train, x_test
示例3: test_feature_union_weights
def test_feature_union_weights():
# test feature union with transformer weights
iris = load_iris()
X = iris.data
y = iris.target
pca = PCA(n_components=2, svd_solver='randomized', random_state=0)
select = SelectKBest(k=1)
# test using fit followed by transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
fs.fit(X, y)
X_transformed = fs.transform(X)
# test using fit_transform
fs = FeatureUnion([("pca", pca), ("select", select)],
transformer_weights={"pca": 10})
X_fit_transformed = fs.fit_transform(X, y)
# test it works with transformers missing fit_transform
fs = FeatureUnion([("mock", Transf()), ("pca", pca), ("select", select)],
transformer_weights={"mock": 10})
X_fit_transformed_wo_method = fs.fit_transform(X, y)
# check against expected result
# We use a different pca object to control the random_state stream
assert_array_almost_equal(X_transformed[:, :-1], 10 * pca.fit_transform(X))
assert_array_equal(X_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_array_almost_equal(X_fit_transformed[:, :-1],
10 * pca.fit_transform(X))
assert_array_equal(X_fit_transformed[:, -1],
select.fit_transform(X, y).ravel())
assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
示例4: scikit_pca
def scikit_pca(model, rel_wds, plot_lims, title, cluster="kmeans"):
"""
Given a word2vec model and a cluster (choice of "kmeans" or "spectral")
Make a plot of all word-vectors in the model.
"""
X, keys = make_data_matrix(model)
for i, key in enumerate(keys):
X[i,] = model[key]
if cluster == "kmeans":
k_means = KMeans(n_clusters=8)
labels = k_means.fit_predict(X)
elif cluster == "spectral":
sp_clust = SpectralClustering()
labels = sp_clust.fit_predict(X)
# PCA
X_std = StandardScaler().fit_transform(X)
sklearn_pca = PCA(n_components=2)
X_transf = sklearn_pca.fit_transform(X_std)
scatter_plot(X_transf[:,0], X_transf[:,1], rel_wds, labels, title, keys, plot_lims)
return sklearn_pca.explained_variance_ratio_
示例5: __init__
def __init__(self):
super(RegressionDriver, self).__init__()
if REGRESSOR == "LOG":
self.driver = LogisticRegression()
elif REGRESSOR == "RFR":
self.driver = RandomForestRegressor(n_estimators=N_ESTIMATORS, n_jobs=N_JOBS)
elif REGRESSOR == "GBR":
self.driver = GradientBoostingClassifier(n_estimators=300, max_depth=5, learning_rate=0.05)
elif REGRESSOR == "PCA":
self.driver = PCA(n_components=1)
else:
raise Exception("Regressor: %s not supported." % REGRESSOR)
genuineX = []
forgeryX = []
genuineY = []
forgeryY = []
# Training process
for sigs in self.train_set:
personTrain = PersonTraining(sigs)
genuine, forgery = personTrain.calc_train_set()
genuineX.extend(genuine)
forgeryX.extend(forgery)
# To adjust PCA result, 0 means genuine and 1 means forgery
genuineY = [0.0] * len(genuineX)
forgeryY = [1.0] * len(forgeryX)
trainX = genuineX + forgeryX
trainY = genuineY + forgeryY
self.driver.fit(trainX, trainY)
示例6: main
def main():
inp=np.loadtxt('../../out_files/bivar_regress.txt', usecols=(1, 2, 3))
X=inp[:,[1, 2]]
ncomp=int(sys.argv[3])
pca=PCA(n_components=ncomp)
pca.fit(X)
l=pca.transform(X)
print "Doing an \t"+str(ncomp)+"\t component PCA \n\n----------------"
#linear regression fit
res=sm.OLS(inp[:,0], l).fit()
t2_new=float(sys.argv[1])
err_t2_new=float(sys.argv[2])
#array for 1000 realisations with slope and slope error -0.0264 and 0.004
ar=np.array([(rn(-0.0264, 0.004)*rn(pca.transform([rn(t2_new, err_t2_new)]), 0.85)+rn(np.mean(inp[:,0]), 0.07))/rn(2.0, 0.3) for k in range(1000)])
print "The estimated L_max is\t "+ str(np.mean(ar))
print "The error from the PCA is\t "+ str(np.std(ar))
print "Standard error on y mean is \t "+ str(np.std(inp[:,0])/np.sqrt(len(inp[:,0])))
print "Error by bootstrapping is \t"+ str(np.std(boots(inp[:,0])))
示例7: classification_level_SGDReg_pipeline
def classification_level_SGDReg_pipeline(classifications_DF):
X = classifications_DF.iloc[:,3:89]
#assign the target (session length) to y and convert to int
y_actual = classifications_DF.iloc[:,2:3].astype(float)
#scaling the data for feature selection
X_scaled = preprocessing.scale(X)
X_scaled_train, X_scaled_test, y_actual_train, y_actual_test = train_test_split(X_scaled, y_actual, test_size=0.5, random_state=0)
pca_selection = PCA(n_components=2)
X_features = pca_selection.fit(X_scaled_train['session_length'].values).transform(X_scaled_train)
SGDReg = SGDRegressor(alpha=0.0001)
# Do grid search over k, n_components and SVR parameters:
pipeline = Pipeline([('pca', pca_selection),('SGDReg',SGDReg)])
tuned_params = dict(pca__n_components=[5,30,40,50],
SGDReg__alpha=[0.1,0.01,0.001,0.0001,0.00001],
SGDReg__l1_ratio=[.05, .15, .5, .7, .9, .95, .99, 1],
SGDReg__penalty=['l2','l1','elasticnet'])
grid_search = GridSearchCV(pipeline, param_grid=tuned_params,scoring='mean_squared_error',cv=3,verbose=10)
grid_search.fit(X_scaled_train, y_actual_train['session_length'].values)
print(grid_search.best_estimator_)
y_true, y_pred = y_actual_test['session_length'].values,grid_search.best_estimator_.predict(X_scaled_test)
print "Mean squared error:"+str(mean_squared_error(y_true,y_pred))
pd.DataFrame(y_true, y_pred).to_csv("SGDReg_pred_true.csv")
示例8: cluster_kmeans
def cluster_kmeans():
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# import sklearn.decomposition.pca
limit = 10000
# X,real_labels=data_dict.get_training_set()
filepath = '/home/wenjusun/bigdata/data/adult-income/adult.data'
record_list = data_parser.parse_file_fetch_records(filepath, limit)
X = np.array(data_parser.records_to_vector(record_list, enable_label=False))
pca_estimator = PCA(n_components=1)
X=pca_estimator.fit_transform(X)
kmeans_model = KMeans(n_clusters=4).fit(X)
labels = kmeans_model.labels_
# print kmeans_model.cluster_centers_
# print labels[:100]
print len(X),len(labels)
print labels[:40]
# print array(real_labels)
# count=0
# for xLabel,eLabel in zip(X[-1],labels):
# if xLabel==eLabel:
# count +=1
#
# print "count=%d,ratio:%f" %(count,1.0*count/len(labels))
# print np.sum(labels)
plt.figure(1)
plt.scatter(X,labels)
plt.show()
示例9: reduced_dimension
def reduced_dimension(posture):
i_user = 1
session = 1
while i_user <= 31:
currentdirectory = os.getcwd() # get the directory.
parentdirectory = os.path.abspath(currentdirectory + "/../..") # Get the parent directory(2 levels up)
path = parentdirectory + '\Output Files\Reduced Dimensional Dataset/posture-'+str(posture)+'/GenuineUser'+str(i_user)+''
if not os.path.exists(path):
os.makedirs(path)
while session <= 8:
data = np.genfromtxt("../../Output Files/E2-Genuine User-Session Split/Posture-"+str(posture)+"/GenuineUser-"+str(i_user)+"/1-"+str(i_user)+"-"+str(posture)+"-"+str(session)+".csv", dtype=float, delimiter=",")
userinformation = data[:, [0,1,2,3,4]]
sample_train = data[:, [5,6,7,8,9,10,11,13,15,16,17,18,19,20,21]]
scaler = preprocessing.MinMaxScaler().fit(sample_train)
sample_train_scaled = scaler.transform(sample_train)
pca = PCA(n_components=7)
sample_train_pca = pca.fit(sample_train_scaled).transform(sample_train_scaled)
completedata = np.column_stack((userinformation, sample_train_pca))
np.savetxt("../../Output Files/Reduced Dimensional Dataset/Posture-"+str(posture)+"/GenuineUser"+str(i_user)+"/1-"+str(i_user)+"-"+str(posture)+"-"+str(session)+".csv", completedata, delimiter=',')
session += 1
session = 1
i_user += 1
开发者ID:npalaska,项目名称:Leveraging_the_effect_of_posture_orientation_of_mobile_device_in_Touch-Dynamics,代码行数:29,代码来源:Reduced_dimension.py
示例10: sentence_to_vec
def sentence_to_vec(sentence_list: List[Sentence], embedding_size: int, a: float=1e-3):
sentence_set = []
for sentence in sentence_list:
vs = np.zeros(embedding_size) # add all word2vec values into one vector for the sentence
sentence_length = sentence.len()
for word in sentence.word_list:
a_value = a / (a + get_word_frequency(word.text)) # smooth inverse frequency, SIF
vs = np.add(vs, np.multiply(a_value, word.vector)) # vs += sif * word_vector
vs = np.divide(vs, sentence_length) # weighted average
sentence_set.append(vs) # add to our existing re-calculated set of sentences
# calculate PCA of this sentence set
pca = PCA(n_components=embedding_size)
pca.fit(np.array(sentence_set))
u = pca.components_[0] # the PCA vector
u = np.multiply(u, np.transpose(u)) # u x uT
# pad the vector? (occurs if we have less sentences than embeddings_size)
if len(u) < embedding_size:
for i in range(embedding_size - len(u)):
u = np.append(u, 0) # add needed extension for multiplication below
# resulting sentence vectors, vs = vs -u x uT x vs
sentence_vecs = []
for vs in sentence_set:
sub = np.multiply(u,vs)
sentence_vecs.append(np.subtract(vs, sub))
return sentence_vecs
示例11: pca
def pca(inF,MIN):
df = pd.read_table(inF, header=0)
dc = list(df.columns)
dc[0]='GeneID'
df.columns = dc
print(df.shape)
sel = ~((df.ix[:,2] < MIN) & (df.ix[:,3]< MIN) & (df.ix[:,4]< MIN) & (df.ix[:,5]< MIN) & (df.ix[:,6]< MIN) & (df.ix[:,7]< MIN) & (df.ix[:,8]< MIN) & (df.ix[:,9]< MIN))
df = df.ix[sel,:]
print(df.shape)
X = df.ix[:,2:df.shape[1]].values.T
y = df.columns[2:df.shape[1]].values
X_std = StandardScaler().fit_transform(X)
#pca = PCA(n_components=2)
pca = PCA()
Y_sklearn = pca.fit_transform(X_std)
fig = plt.figure()
plt.style.use('ggplot')
#plt.style.use('seaborn-whitegrid')
ax = fig.add_subplot(111)
for lab, col in zip(y,('red','red', 'green','green', 'blue','blue','m','m')):
ax.scatter(Y_sklearn[y==lab, 0],Y_sklearn[y==lab, 1],label=lab,c=col, s=80)
ax.set_xlabel('Principal Component 1 : %.2f'%(pca.explained_variance_ratio_[0]*100) + '%')
ax.set_ylabel('Principal Component 2 : %.2f'%(pca.explained_variance_ratio_[1]*100) + '%')
ax.legend(loc='lower right', prop={'size':8})
plt.tight_layout()
plt.savefig(inF + '-RNASeq-MIN' + str(MIN) + '.pdf')
示例12: feature_scaled_nn_acc
def feature_scaled_nn_acc(mds, type):
train, validation = validation_split(mds)
# Multiply by 1 to convert to bool
y_train = train['Up'] * 1
X_train = train.drop('Up', axis=1)
y_validation = validation['Up'] * 1
X_validation = validation.drop('Up', axis=1)
pre = PCA(n_components=19, whiten=True)
X_train_pca = pre.fit_transform(X_train)
X_validation_pca = pre.fit_transform(X_validation)
model = create_model(X_train_pca.shape[1], type)
# Convert to Keras format
y_train = to_categorical(y_train.values)
y_validation = to_categorical(y_validation.values)
model.fit(X_train_pca, y_train, nb_epoch=5, batch_size=16)
time.sleep(0.1)
# Fit and guess
guess_train = model.predict_classes(X_train_pca)
guess_train = to_categorical(guess_train)
guess_validation = model.predict_classes(X_validation_pca)
guess_validation = to_categorical(guess_validation)
train_acc = accuracy_score(y_train, guess_train)
validation_acc = accuracy_score(y_validation, guess_validation)
print "\n neural net train accuracy is {}".format(train_acc)
print "\n neural net validation accuracy is {}".format(validation_acc)
return guess_validation
示例13: pca_project
def pca_project(vecs, n_components=2, whiten=False):
pca = PCA(n_components=n_components)
vecs_projected = pca.fit_transform(vecs)
print "=== PCA projection ==="
print pca.explained_variance_ratio_
print "choosen explained: %.2f" % np.sum(pca.explained_variance_ratio_[:n_components])
return vecs_projected
示例14: Ploting3D
def Ploting3D(data, n_dimension=3):
pca = PCA(n_components = n_dimension)
colors = ['r', 'g', 'b', 'm']
labels = ['label_1', 'label_2', 'label_3', 'label_4']
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
idx = [0, len(data[0])]
combined = np.array(data[0])
# Combined all data
for i in xrange(1, len(data)):
combined = np.insert(combined, len(combined), data[i], axis=0)
idx.append(idx[i]+len(data[i]))
combined = pca.fit_transform(combined)
for i in xrange(len(data)):
ax.scatter(combined[idx[i]:idx[i+1], 0], combined[idx[i]:idx[i+1], 1], combined[idx[i]:idx[i+1], 2], c=colors[i], marker='o', s=70)
ax.set_xlabel('1st_component')
ax.set_ylabel('2nd_component')
ax.set_zlabel('3rd_component')
ax.set_xlim3d(-100, 100)
ax.set_ylim3d(-60, 50)
ax.set_zlim3d(-60, 50)
plt.show()
示例15: plot_2d_results
def plot_2d_results(X, y, preds):
pca = PCA(n_components=2)
X_r = pca.fit(X).transform(X)
# Plot scatter
plt.figure()
cs = "cm"
cats = [1, -1]
target_names = ["positive", "negative"]
for c, i, target_name in zip(cs, cats, target_names):
plt.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name)
plt.legend()
plt.title("PCA of 2d data")
plt.savefig("figures/data-scatter.png")
# Plot mispredictions
plt.figure()
diff = np.array([1 if y_test[i] == preds[i] else 0 for i in range(len(y_test))])
cs = "rg"
cats = [0, 1]
target_names = ["incorrect", "correct"]
for c, i, target_name in zip(cs, cats, target_names):
plt.scatter(X_r[diff == i, 0], X_r[diff == i, 1], c=c, label=target_name)
plt.legend()
plt.title("PCA of correct/incorrect predictions")
# plt.show()
plt.savefig("figures/residual-scatter.png")