本文整理汇总了Python中sklearn.decomposition.IncrementalPCA.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python IncrementalPCA.fit_transform方法的具体用法?Python IncrementalPCA.fit_transform怎么用?Python IncrementalPCA.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.IncrementalPCA
的用法示例。
在下文中一共展示了IncrementalPCA.fit_transform方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ipca
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def ipca():
train_features, test_features = gf.get_tfidf()
vectorizer = gf.get_tfidf()
n_components = 250
ipca = IncrementalPCA(n_components=n_components, batch_size=1250)
start_time = time.time()
print 'start ipca on train'
X_ipca = ipca.fit_transform(train_features)
runtime = time.time() - start_time
print '-----'
print '%.2f seconds to ipca on train' % runtime
print '-----'
train_features = None
print 'ipca train done'
np.savetxt('train_features.csv', X_ipca, fmt='%.8e', delimiter=",")
X_ipca = None
print 'ipca train file done'
test_features = gf.get_tfidf(vectorizer, False)
Y_ipca = ipca.fit_transform(test_features)
test_features, vectorizer = None, None
print 'ipca test done'
np.savetxt('test_features.csv', Y_ipca, fmt='%.8e', delimiter=",")
svd_test_features = None
print 'ipca test file done'
示例2: reduceDataset
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def reduceDataset(self,nr=3,method='PCA'):
'''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
Methods available:
'PCA'
'FactorAnalysis'
'KPCArbf','KPCApoly'
'KPCAcosine','KPCAsigmoid'
'IPCA'
'FastICADeflation'
'FastICAParallel'
'Isomap'
'LLE'
'LLEmodified'
'LLEltsa'
'''
dataset=self.ModelInputs['Dataset']
#dataset=self.dataset[Model.in_columns]
#dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
#PCA
if method=='PCA':
sklearn_pca = sklearnPCA(n_components=nr)
reduced = sklearn_pca.fit_transform(dataset)
#Factor Analysis
elif method=='FactorAnalysis':
fa=FactorAnalysis(n_components=nr)
reduced=fa.fit_transform(dataset)
#kernel pca with rbf kernel
elif method=='KPCArbf':
kpca=KernelPCA(nr,kernel='rbf')
reduced=kpca.fit_transform(dataset)
#kernel pca with poly kernel
elif method=='KPCApoly':
kpca=KernelPCA(nr,kernel='poly')
reduced=kpca.fit_transform(dataset)
#kernel pca with cosine kernel
elif method=='KPCAcosine':
kpca=KernelPCA(nr,kernel='cosine')
reduced=kpca.fit_transform(dataset)
#kernel pca with sigmoid kernel
elif method=='KPCAsigmoid':
kpca=KernelPCA(nr,kernel='sigmoid')
reduced=kpca.fit_transform(dataset)
#ICA
elif method=='IPCA':
ipca=IncrementalPCA(nr)
reduced=ipca.fit_transform(dataset)
#Fast ICA
elif method=='FastICAParallel':
fip=FastICA(nr,algorithm='parallel')
reduced=fip.fit_transform(dataset)
elif method=='FastICADeflation':
fid=FastICA(nr,algorithm='deflation')
reduced=fid.fit_transform(dataset)
elif method == 'All':
self.dimensionalityReduction(nr=nr)
return self
self.ModelInputs.update({method:reduced})
self.datasetsAvailable.append(method)
return self
示例3: get_pca_array
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def get_pca_array(list_chunks, topology):
"""
Takes a list of mdtraj.Trajectory objects and featurize them to backbone -
Alpha Carbons pairwise distances. Perform 2 component Incremental
PCA on the featurized trajectory.
Parameters
----------
list_chunks: list of mdTraj.Trajectory objects
topology: str
Name of the Topology file
Returns
-------
Y: np.array shape(frames, features)
"""
pca = IncrementalPCA(n_components=2)
top = md.load_prmtop(topology)
ca_backbone = top.select("name CA")
pairs = top.select_pairs(ca_backbone, ca_backbone)
pair_distances = []
for chunk in list_chunks:
X = md.compute_distances(chunk, pairs)
pair_distances.append(X)
distance_array = np.concatenate(pair_distances)
print("No. of data points: %d" % distance_array.shape[0])
print("No. of features (pairwise distances): %d" % distance_array.shape[1])
Y = pca.fit_transform(distance_array)
return Y
示例4: dimensionalityReduction
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def dimensionalityReduction(self,nr=5):
'''It applies all the dimensionality reduction techniques available in this class:
Techniques available:
'PCA'
'FactorAnalysis'
'KPCArbf','KPCApoly'
'KPCAcosine','KPCAsigmoid'
'IPCA'
'FastICADeflation'
'FastICAParallel'
'Isomap'
'LLE'
'LLEmodified'
'LLEltsa'
'''
dataset=self.ModelInputs['Dataset']
sklearn_pca = sklearnPCA(n_components=nr)
p_components = sklearn_pca.fit_transform(dataset)
fa=FactorAnalysis(n_components=nr)
factors=fa.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='rbf')
rbf=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='poly')
poly=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='cosine')
cosine=kpca.fit_transform(dataset)
kpca=KernelPCA(nr,kernel='sigmoid')
sigmoid=kpca.fit_transform(dataset)
ipca=IncrementalPCA(nr)
i_components=ipca.fit_transform(dataset)
fip=FastICA(nr,algorithm='parallel')
fid=FastICA(nr,algorithm='deflation')
ficaD=fip.fit_transform(dataset)
ficaP=fid.fit_transform(dataset)
'''isomap=Isomap(n_components=nr).fit_transform(dataset)
try:
lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
except ValueError:
lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
try:
lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
except ValueError:
lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset)
try:
lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
except ValueError:
lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
self.ModelInputs.update(dict(zip(keys, values)))
[self.datasetsAvailable.append(key) for key in keys ]
#debug
#dataset=pd.DataFrame(self.ModelInputs['Dataset'])
#dataset['Output']=self.ModelOutput
#self.debug['Dimensionalityreduction']=dataset
###
return self
示例5: reduce_data
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def reduce_data(features, out_dir, dim=10, first_column=True):
array = np.load(features)
subarray = array
if not first_column:
subarray = array[:, 1:]
ipca = IncrementalPCA(n_components=dim, copy=False, batch_size=500000)
ipca.fit_transform(subarray)
new_array = subarray
# when it cannot fit into memory do it incrementally like below
# new_array_1 = tsvd.fit_transform(subarray[:1500000, :])
# new_array_2 = tsvd.fit_transform(subarray[1500000:3400000, :])
# new_array_3 = tsvd.fit_transform(subarray[3400000:, :])
# new_array = np.vstack([new_array_1, new_array_2, new_array_3])
if not first_column:
new_array = np.c_[array[:, 0], new_array]
assert new_array.shape[0] == array.shape[0]
np.save(os.path.join(out_dir, os.path.basename(features) + "_pca"), new_array)
示例6: ipca
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def ipca(data, labels, new_dimension):
print "start incremental pca..."
if hasattr(data, "todense"):
data = np.array(data.todense())
start = time.time()
pca = IncrementalPCA(n_components=new_dimension)
reduced = pca.fit_transform(data)
end = time.time()
return (reduced, end-start)
示例7: run_pca
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def run_pca(n_components,n_sites,order_dict,sim_mat):
output_file = open('pca_100000_100','w')
ipca = IncrementalPCA(n_components=n_components,batch_size=8000)
sim_mat_ipca = ipca.fit_transform(sim_mat)
var_sim_ipca = ipca.explained_variance_ratio_
output_file.write(",".join(str(x) for x in var_sim_ipca)+'\n')
for siteid in order_dict:
stringa = ' '.join(
[siteid,
str(sim_mat_ipca[order_dict[siteid], 0]),
str(sim_mat_ipca[order_dict[siteid], 1]),
str(sim_mat_ipca[order_dict[siteid], 2]),
str(sim_mat_ipca[order_dict[siteid], 3]),
str(sim_mat_ipca[order_dict[siteid], 4]),
str(sim_mat_ipca[order_dict[siteid], 5]),
str(sim_mat_ipca[order_dict[siteid], 6])
])
output_file.write(stringa +'\n')
n_bins = 1000.
binned = np.empty((n_sites,5)).astype(np.int32)
for k in range(5):
delta = (sim_mat_ipca[:, k].max()-sim_mat_ipca[:, k].min())/n_bins
min_k = sim_mat_ipca[:, k].min()
for i in range(n_sites):
binned[i,k] = int((sim_mat_ipca[i, k]-min_k)/delta)
f = open('pc_100000_100.csv','w')
for siteid in order_dict:
stringa = ' '.join(
[siteid,
str(binned[order_dict[siteid], 0]),
str(binned[order_dict[siteid], 1]),
str(binned[order_dict[siteid], 2]),
str(binned[order_dict[siteid], 3]),
str(binned[order_dict[siteid], 4])
])
f.write(stringa +'\n')
f.close()
示例8: test_incremental_pca
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def test_incremental_pca():
"""Incremental PCA on dense arrays."""
X = iris.data
batch_size = X.shape[0] // 3
ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
pca = PCA(n_components=2)
pca.fit_transform(X)
X_transformed = ipca.fit_transform(X)
np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
assert_almost_equal(ipca.explained_variance_ratio_.sum(),
pca.explained_variance_ratio_.sum(), 1)
for n_components in [1, 2, X.shape[1]]:
ipca = IncrementalPCA(n_components, batch_size=batch_size)
ipca.fit(X)
cov = ipca.get_covariance()
precision = ipca.get_precision()
assert_array_almost_equal(np.dot(cov, precision),
np.eye(X.shape[1]))
示例9: PCASK
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
class PCASK(AbstractFeature):
def __init__(self, n_components):
AbstractFeature.__init__(self)
self.n_components = n_components
#for key in options:
#setattr(self,key,options[key])
def compute(self,X,y):
if X.ndim == 3:
X = X.reshape((X.shape[0],X.shape[1]*X.shape[2]))
self.ipca = IncrementalPCA(n_components=self.n_components, batch_size=None)
return self.ipca.fit_transform(X)
def extract(self,X):
if X.ndim == 2:
X = X.reshape((X.shape[0]*X.shape[1]))
return list(self.ipca.transform([X])[0])
def __repr__(self):
return "PCASK"
示例10: load_iris
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
# Authors: Kyle Kastner
# License: BSD 3 clause
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA, IncrementalPCA
iris = load_iris()
X = iris.data
y = iris.target
n_components = 2
ipca = IncrementalPCA(n_components=n_components, batch_size=10)
X_ipca = ipca.fit_transform(X)
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X)
colors = ['navy', 'turquoise', 'darkorange']
for X_transformed, title in [(X_ipca, "Incremental PCA"), (X_pca, "PCA")]:
plt.figure(figsize=(8, 8))
for color, i, target_name in zip(colors, [0, 1, 2], iris.target_names):
plt.scatter(X_transformed[y == i, 0], X_transformed[y == i, 1],
color=color, lw=2, label=target_name)
if "Incremental" in title:
err = np.abs(np.abs(X_pca) - np.abs(X_ipca)).mean()
plt.title(title + " of iris dataset\nMean absolute unsigned error "
示例11: main
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def main(date, takeSubset=False):
"""
Reduces the dimensionality of the training data to 3 dimensions,
plots the transformed data in 3d space. The idea is to bring
out separability between the resistance classes which may be
hidden in the dimensionality of the data.
:param date: (string) Data collection date YYYY_MMDD
:param takeSubset: (boolean) Transform and plot a random subset of
the trainng data?
:return: (None)
"""
mkl.set_num_threads(8)
# Load the training and testing data into memory
trainX, trainY = FileIO.loadTrainingData(date)
if takeSubset:
indices = np.random.choice(range(0, len(trainY)), size=NUM_SAMPLES, replace=False)
X = trainX[indices,:]
y = trainY[indices]
else:
X = trainX
y = trainY
X = np.nan_to_num(X)
# Break the data into resistance classes
susIndex = Constants.LABEL_TO_INDEX[Constants.SUSCEPTIBLE]
drIndex = Constants.LABEL_TO_INDEX[Constants.DR_RESISTANT]
grIndex = Constants.LABEL_TO_INDEX[Constants.GR_RESISTANT]
susX = X[y==susIndex, :]
drX = X[y==drIndex, :]
grX = X[y==grIndex, :]
# Transform the data using PCA
pca = IncrementalPCA(n_components=6)
pointsSUS = pca.fit_transform(susX)
pointsGR= pca.fit_transform(grX)
pointsDR = pca.fit_transform(drX)
# Plot the transformed data in 3D space
traceSUS = go.Scatter3d(
x=pointsSUS[:, 0],
y=pointsSUS[:, 1],
z=pointsSUS[:, 2],
mode='markers',
marker=dict(
size=5,
line=dict(
color='rgba(255, 0, 0, 0)',
width=0.1
),
opacity=0
)
)
traceDR = go.Scatter3d(
x=pointsDR[:, 0],
y=pointsDR[:, 1],
z=pointsDR[:, 2],
mode='markers',
marker=dict(
size=5,
line=dict(
color='rgba(0, 255, 0, 0)',
width=0.1
),
opacity=0
)
)
traceGR = go.Scatter3d(
x=pointsGR[:, 0],
y=pointsGR[:, 1],
z=pointsGR[:, 2],
mode='markers',
marker=dict(
size=5,
line=dict(
color='rgba(0, 0, 255, 0)',
width=0.1
),
opacity=0
)
)
data = [traceSUS, traceDR, traceGR]
fig = go.Figure(data=data)
py.iplot(fig, filename='3D PCA Wavelength Plot')
# Plot the principle components
eigenSpectra = pca.components_
plt.subplot(3,1,1)
plt.plot(Constants.WAVELENGTHS, eigenSpectra[0, :])
#.........这里部分代码省略.........
示例12: print
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
# Time =
# Data decomposition
print("Now Decompositing Data")
start_time = time.clock()
#from sklearn.decomposition import TruncatedSVD
#decomp = TruncatedSVD(n_components=1000,n_iter=5)
#decomp.fit(train_data)
train_data = pca.fit_transform(train_data)
end_time = time.clock()
print("Decompositing Complete \nTime =", end_time - start_time)
# Time =
print(train_data)
# Saving decomposed data as csv
csv_decomp_train_path = 'csv_pca900decomp_alphabets_train.csv'
with open( csv_decomp_train_path, 'w') as f:
示例13: fit_pca
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
def fit_pca(self, matrix):
"""Fit pca matrix and save sklearn model """
reducer = IncrementalPCA(n_components=800, batch_size=2500)
reduced_matrix = reducer.fit_transform(matrix)
self.rev_matrix_pca = reduced_matrix
self.pca_model = reducer
示例14: enumerate
# 需要导入模块: from sklearn.decomposition import IncrementalPCA [as 别名]
# 或者: from sklearn.decomposition.IncrementalPCA import fit_transform [as 别名]
import numpy as np
from gensim.models import Word2Vec
from sklearn.decomposition import IncrementalPCA
# from bhtsne import tsne
WORD2VEC_MODEL = 'GNews.model'
WORD2VEC_JSON = 'word2vec.json'
model = Word2Vec.load(WORD2VEC_MODEL)
words = []
vectors = np.empty((len(model.vocab.keys()), 300))
# vectors = np.empty((6, 300))
# for i, w in enumerate(['email', 'password', 'user', 'date', 'this', 'is']):
for i, w in enumerate(model.vocab.keys()):
words.append(w)
vectors[i] = model[w]
# vectors = tsne(vectors, dimensions=3, perplexity=50)
ipca = IncrementalPCA(n_components=2, batch_size=25000)
vectors = ipca.fit_transform(vectors)
json_vectors = {}
for i, w in enumerate(words):
json_vectors[w] = vectors[i].tolist()
with open(WORD2VEC_JSON, 'w') as f:
json.dump(json_vectors, f)