本文整理汇总了Python中sklearn.decomposition.PCA.n_components方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.n_components方法的具体用法?Python PCA.n_components怎么用?Python PCA.n_components使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.PCA
的用法示例。
在下文中一共展示了PCA.n_components方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def test_pca():
"""PCA on dense arrays"""
pca = PCA(n_components=2)
X = iris.data
X_r = pca.fit(X).transform(X)
np.testing.assert_equal(X_r.shape[1], 2)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
pca = PCA()
pca.fit(X)
assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
X_r = pca.transform(X)
X_r2 = pca.fit_transform(X)
assert_array_almost_equal(X_r, X_r2)
# Test get_covariance and get_precision with n_components == n_features
# with n_components < n_features and with n_components == 0
for n_components in [0, 2, X.shape[1]]:
pca.n_components = n_components
pca.fit(X)
cov = pca.get_covariance()
precision = pca.get_precision()
assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
示例2: compute_scores
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def compute_scores(X):
pca = PCA()
pca_scores = []
for n in n_components:
pca.n_components = n
pca.fit(X)
pca_scores.append(pca.explained_variance_ratio_)
# pca_scores.append(np.mean(cross_val_score(pca, X)))
return pca_scores
示例3: compute_scores
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def compute_scores(X):
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
return pca_scores, fa_scores
示例4: reduceDimensionalityToTwo
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def reduceDimensionalityToTwo(matrix):
"""
Reduces the dimension via PCA (to 2) for visualisation.
:param matrix (NumPy array) - The feature matrix.
:return: coordinates (NumPy array) - Array containing the x and y coordinates.
"""
pca = PCA()
pca.n_components = 2
coordinates = pca.fit_transform(matrix)
return coordinates
示例5: compute_scores
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def compute_scores(X, n_components):
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
print 'Processing dimension {}'.format(n)
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
return pca_scores, fa_scores
示例6: compute_scores
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def compute_scores(X, n_components):
"""
This is the "y" data of the plots -- the CV scores.
"""
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
return pca_scores, fa_scores
示例7: computeScores
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def computeScores(self, X):
"""
Computes the scores for a given X feature vector considering various
numbers of features
"""
pca = PCA()
pca_scores = []
for n in self.n_components:
print "Computing score for", n, "components"
sys.stdout.flush()
pca.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
return pca_scores
示例8: compute_scores
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def compute_scores(X, n_components):
pca = PCA()
fa = FactorAnalysis()
pca_scores, fa_scores = [], []
for n in n_components:
start = time.time()
pca.n_components = n
fa.n_components = n
pca_scores.append(np.mean(cross_val_score(pca, X)))
fa_scores.append(np.mean(cross_val_score(fa, X)))
end = time.time()
print 'PCA scores (%3d)' % n, pca_scores
print 'FA scores (%3d)' % n, fa_scores
print 'TIME: ', end-start
return pca_scores, fa_scores
示例9: pca
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def pca (matriztfxidf):
normalizaTFXIDF (matriztfxidf)
vetor = []
# testa qual o melhor numero de componentes que representa 95% dos dados
for i in range(len(matriztfxidf[0])):
num_pca_components = i
pca = PCA(num_pca_components)
pca.fit(matriztfxidf)
PCA(copy=True, whiten=False)
# guardo em um vetor para montar graficos
# vetor.append(sum(pca.explained_variance_ratio_))
if(sum(pca.explained_variance_ratio_) >= 0.95):
#print "Numero de colunas: ", len(np.transpose(matriztfxidf))
#print "Numero de linhas: ", len(matriztfxidf)
#print "Numero de PCA componentes (95%): ",num_pca_components
#print "Componentes"
#print pca.explained_variance_ratio_
pca.n_components = num_pca_components
pca.fit(matriztfxidf)
matrizReduzida = pca.fit_transform(matriztfxidf)
#print "Tamanho da matriz redimensionada: ", matrizReduzida.shape
#print "Nova matriz reduzida: ", matrizReduzida
break
#inverso = pca.inverse_transform(matrizReduzida)
#print "INVERSO"
#print inverso
#return matrizReduzida
return num_pca_components
示例10: open
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
# #open file and write the results into a csv file
# myfile = open('results2.csv', 'wb')
# wr = csv.writer(myfile, dialect='excel')
# #wr.writerows(prediction)
# for row in prediction:
# wr.writerow([row])
#######################################With PCA#########################
#perform PCA
pca = PCA(n_components=22)
pca.fit(X_train)
print(pca.explained_variance_ratio_)
#from the variance ratio we choose the first 15 variables
pca.n_components = 15
X_reduced = pca.fit_transform(X_train)
X_reduced.shape
# separate the train data into differentes samples
kf = KFold(len(X_reduced), n_folds=2)
score_model1 = []
#itirate the samples
for train_index, test_index in kf:
#fit the train data
regr.fit(X_reduced[train_index], Y_train[train_index])
#Score with test data that we got from the kfold
示例11: main
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
def main():
conn = MySQLdb.connect(
user="root",
passwd="",
db="Player_Team_Data",
cursorclass=MySQLdb.cursors.DictCursor)
# In[97]:
# read in AllPlayerNames .csv from basketball-reference
df_all_players = pd.read_csv('~/Insight/Players100.csv')
# Remove rows that were separated by random 'Player' entries
df_all_players = df_all_players[df_all_players.Name != 'Player']
name_list=list(df_all_players.values)
new_name_list = []
for name in name_list:
# convert entries to strings
name = str(name)
new_name_list.append(name)
unique_name_list=list(set(new_name_list))
unique_name_list.sort()
name_list_final = []
for name in unique_name_list:
match = re.search('([\w\.\-\']+) ([\w\.\-]+)', name)
prename = match.group(1).lower()[1:] + ' ' + match.group(2).lower()
name_list_final.append(prename)
name_list_fin = [x for x in name_list_final if x != 'george hill']
accuracy_list=[]
for player in name_list_fin:
print player
cmd_Rk= 'SELECT Rk FROM NBA_player_data WHERE Player_Name IN (\'' + player + '\') AND Year IN (\'2015\');'
df_Rk = pd.read_sql(cmd_Rk, con=conn)
f_pre = []
f_avg = []
f_tru = []
for Rki in df_Rk.Rk:
Rk = str(Rki)
print Rk
cmd_target_2015 = 'SELECT PTS,3P,TRB,AST,STL,BLK,TOV FROM NBA_player_data WHERE Player_Name IN (\'' + player + '\') AND Year IN (\'2015\') AND Rk < '+Rk+' ;'
cmd_target_2014 = 'SELECT PTS,3P,TRB,AST,STL,BLK,TOV FROM NBA_player_data WHERE Player_Name IN (\'' + player + '\') AND Year IN (\'2014\') AND Rk >= '+Rk+' ;'
cmd_train_2015 = 'SELECT Rk,Home_Away,DateDiff,TeamID,Win,OPPG,OTPR,O3Ppercent,ORPG,OBPG,OSPG,DEF,O3PM,OFGpercent,OTPG,OAPG,TPG,SPG,TRBR,OBLKpercent FROM NBA_player_data WHERE Player_Name IN (\'' + player + '\') AND Year IN (\'2015\') AND Rk < '+Rk+';'
cmd_train_2014 = 'SELECT Rk,Home_Away,DateDiff,TeamID,Win,OPPG,OTPR,O3Ppercent,ORPG,OBPG,OSPG,DEF,O3PM,OFGpercent,OTPG,OAPG,TPG,SPG,TRBR,OBLKpercent FROM NBA_player_data WHERE Player_Name IN (\'' + player + '\') AND Year IN (\'2014\') AND Rk >= '+Rk+';'
cmd_operate = 'SELECT Rk,Home_Away,DateDiff,TeamID,Win,OPPG,OTPR,O3Ppercent,ORPG,OBPG,OSPG,DEF,O3PM,OFGpercent,OTPG,OAPG,TPG,SPG,TRBR,OBLKpercent FROM NBA_player_data WHERE Player_Name IN (\'' + player + '\') AND Year IN (\'2015\') AND Rk = '+Rk+';'
cmd_truth = 'SELECT PTS,3P,TRB,AST,STL,BLK,TOV FROM NBA_player_data WHERE Player_Name IN (\'' + player + '\') AND Year IN (\'2015\') AND Rk = '+Rk+' ;'
df_target_2015 = pd.read_sql(cmd_target_2015, con=conn)
df_target_2014 = pd.read_sql(cmd_target_2014, con=conn)
df_train_2015 = pd.read_sql(cmd_train_2015, con=conn)
df_train_2014 = pd.read_sql(cmd_train_2014, con=conn)
df_operate = pd.read_sql(cmd_operate, con=conn)
df_truth = pd.read_sql(cmd_truth, con=conn)
df_truth = df_truth.applymap(lambda x: float(x))
df_target=pd.concat([df_target_2014, df_target_2015],ignore_index=True)
df_train=pd.concat([df_train_2014, df_train_2015],ignore_index=True)
df_target = df_target.applymap(lambda x:float(x))
df_train = df_train.applymap(lambda x:float(x))
df_target_2015 = df_target_2015.applymap(lambda x: float(x))
df_inquire = df_operate.applymap(lambda x:float(x))
df_train_plus_inquire=pd.concat([df_train, df_inquire])
df_raw = df_train_plus_inquire.reindex()
df_raw_scaled = df_raw.copy()
df_raw_scaled = df_raw_scaled.applymap(lambda x: np.log(x))
df_raw_transform = df_raw.copy()
df_raw_scaled = df_raw_scaled.apply(lambda x:preprocessing.StandardScaler().fit(x).transform(x))
df_raw_transform = df_raw_transform.apply(lambda x:preprocessing.StandardScaler().fit(x))
pca = PCA()
pca.fit(df_raw_scaled)
pca.n_components=7
train_reduced = pca.fit_transform(df_raw_scaled)
df_train_reduced=pd.DataFrame(train_reduced)
df_evaluate = df_train_reduced.tail(1)
df_train_scaled = df_train_reduced.iloc[:-1]
# rf = RandomForestRegressor(n_estimators=100)
# rf.fit(df_train_scaled, df_target)
# predictions = rf.predict(df_evaluate).round()[0]
PTS = LinR()
PTS.fit(df_train_scaled, df_target.PTS)
pPTS = PTS.predict(df_evaluate)
REB = LinR()
REB.fit(df_train_scaled, df_target.TRB)
pREB = REB.predict(df_evaluate)
AST = LinR()
AST.fit(df_train_scaled, df_target.AST)
pAST = AST.predict(df_evaluate)
TP = LinR()
TP.fit(df_train_scaled, df_target['3P'])
pTP = TP.predict(df_evaluate)
#.........这里部分代码省略.........
示例12: genfromtxt
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
from src.Utility.ScatterWithHistPlot import ScatterWithHistPlot
__author__ = 'stanley'
from sklearn.decomposition import PCA, KernelPCA
from numpy import genfromtxt
from sklearn import preprocessing
nbaData = genfromtxt('../../NBA2012-15/Classification/NBA12_14.csv', delimiter=',')
label = nbaData[:,0]
features = nbaData[:,1:]
pca = PCA()
pca.fit(features)
#Print out variance
print pca.explained_variance_ratio_
# plot first 2 components
pca.n_components=2
f_reduced = pca.fit_transform(features)
showGraph = ScatterWithHistPlot()
showGraph.plot(f_reduced, label)
示例13: TfidfVectorizer
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import n_components [as 别名]
card = card.split(' ')
cardT = []
for w in card:
if w != '' and w != '\r\n':
cardT.append(w)
cards.append(' '.join(cardT))
vectorizer = TfidfVectorizer(min_df = 5,max_df = 0.5,ngram_range = (1,2))
X = vectorizer.fit_transform(cards).toarray()
n_components = np.arange(50,80, 5) # options for n_components
print X.shape
fa = PCA()
fa_scores = []
for n in n_components:
print n
sys.stdout.flush()
fa.n_components = n
fa.fit(X)
fa_scores.append(fa.score(X))
print '\t',fa_scores[-1]
fa.n_components = n_components[np.argmax(fa_scores)]
Y = fa.fit_transform(X)
for name,ii in zip(cardnames,range(len(Y))):
print name+'@'+ '@'.join(str(v) for v in list(Y[ii,:]))