本文整理汇总了Python中sklearn.decomposition.SparsePCA.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python SparsePCA.fit_transform方法的具体用法?Python SparsePCA.fit_transform怎么用?Python SparsePCA.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.SparsePCA
的用法示例。
在下文中一共展示了SparsePCA.fit_transform方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_correct_shapes
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
def test_correct_shapes():
rng = np.random.RandomState(0)
X = rng.randn(12, 10)
spca = SparsePCA(n_components=8, random_state=rng)
U = spca.fit_transform(X)
assert_equal(spca.components_.shape, (8, 10))
assert_equal(U.shape, (12, 8))
# test overcomplete decomposition
spca = SparsePCA(n_components=13, random_state=rng)
U = spca.fit_transform(X)
assert_equal(spca.components_.shape, (13, 10))
assert_equal(U.shape, (12, 13))
示例2: test_transform_nan
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
def test_transform_nan():
# Test that SparsePCA won't return NaN when there is 0 feature in all
# samples.
rng = np.random.RandomState(0)
Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array
Y[:, 0] = 0
estimator = SparsePCA(n_components=8)
assert_false(np.any(np.isnan(estimator.fit_transform(Y))))
示例3: test_fit_transform_tall
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
def test_fit_transform_tall():
rng = np.random.RandomState(0)
Y, _, _ = generate_toy_data(3, 65, (8, 8), random_state=rng) # tall array
spca_lars = SparsePCA(n_components=3, method="lars", random_state=rng)
U1 = spca_lars.fit_transform(Y)
spca_lasso = SparsePCA(n_components=3, method="cd", random_state=rng)
U2 = spca_lasso.fit(Y).transform(Y)
assert_array_almost_equal(U1, U2)
示例4: test_scaling_fit_transform
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
def test_scaling_fit_transform():
alpha = 1
rng = np.random.RandomState(0)
Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
random_state=rng, normalize_components=True)
results_train = spca_lars.fit_transform(Y)
results_test = spca_lars.transform(Y[:10])
assert_allclose(results_train[0], results_test[0])
示例5: range
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
cnt=0
feature=[[0 for i in range(0,n_feat)] for j in range(0,120542)] #80362
for line in fin:
a=line.split(" ")
for i in range(2,n_feat):
feature[cnt][i-2]=float(a[i].split(":")[1])
cnt+=1
print cnt
#print feature[cnt-1]
X=np.array(feature)
'''
pca=PCA(n_components=n_feat)
pca_result=pca.fit_transform(X)
'''
pca=SparsePCA(n_components=n_feat,alpha=0.6,n_jobs=2,max_iter=15)
pca_result=pca.fit_transform(X)
#print pca_result[0]
cnt=0
fin = open("data/feature/train_gh_97a",'r')
for line in fin:
a=line.split(" ")
PCA_d=50
for i in range(0,PCA_d):
a[i+2]=str(i)+":"+str(feature[cnt][i])
ll=" ".join(a[0:PCA_d+2])
fo.write(ll+"\n")
cnt+=1
fo.close()
示例6: SPCA
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
class SPCA(object):
"""
Wrapper for sklearn package. Performs sparse PCA
SPCA has 5 methods:
- fit(waveforms)
update class instance with ICA fit
- fit_transform()
do what fit() does, but additionally return the projection onto ICA space
- inverse_transform(A)
inverses the decomposition, returns waveforms for an input A, using Z
- get_basis()
returns the basis vectors Z^\dagger
- get_params()
returns metadata used for fits.
"""
def __init__(self, num_components=10,
catalog_name='unknown',
alpha = 0.1,
ridge_alpha = 0.01,
max_iter = 2000,
tol = 1e-9,
n_jobs = 1,
random_state = None):
self._decomposition = 'Sparse PCA'
self._num_components = num_components
self._catalog_name = catalog_name
self._alpha = alpha
self._ridge_alpha = ridge_alpha
self._n_jobs = n_jobs
self._max_iter = max_iter
self._tol = tol
self._random_state = random_state
self._SPCA = SparsePCA(n_components=self._num_components,
alpha = self._alpha,
ridge_alpha = self._ridge_alpha,
n_jobs = self._n_jobs,
max_iter = self._max_iter,
tol = self._tol,
random_state = self._random_state)
def fit(self,waveforms):
# TODO make sure there are more columns than rows (transpose if not)
# normalize waveforms
self._waveforms = waveforms
self._SPCA.fit(self._waveforms)
def fit_transform(self,waveforms):
# TODO make sure there are more columns than rows (transpose if not)
# normalize waveforms
self._waveforms = waveforms
self._A = self._SPCA.fit_transform(self._waveforms)
return self._A
def inverse_transform(self,A):
# convert basis back to waveforms using fit
new_waveforms = self._SPCA.inverse_transform(A)
return new_waveforms
def get_params(self):
# TODO know what catalog was used! (include waveform metadata)
params = self._SPCA.get_params()
params['num_components'] = params.pop('n_components')
params['Decompositon'] = self._decomposition
return params
def get_basis(self):
""" Return the SPCA basis vectors (Z^\dagger)"""
Zt = self._SPCA.components_
return Zt
示例7: print
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
count += 1
if count > n:
break
try:
cat = io.imread("sparse-cats/"+f,as_grey=True).flatten()
cat.shape = (40000,1)
images = np.append(images, cat, axis=1)
except:
count -= 1
continue
print("loaded cats...")
tic = time.clock()
print("starting learning...")
pca = SparsePCA(n_components=n,max_iter=1000)
x = pca.fit_transform(images,subject)
print("learning done...")
toc = time.clock()
print(x)
out = np.zeros(40000)
print("starting transform...")
for i in range(40000):
for j in range(n):
#out[i] += (x[i,j])
out[i] += (images[i,j] * x[i,j])
out.shape = (200,200)
print(out)
name = re.match("people/([a-z]*)_small.jpg",filename).group(1)
io.imsave("pca/pca_cat_{0}_{1}.jpg".format(n,name),out)
示例8: textSimilarity
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
def textSimilarity():
NeighborDirectory = GEOTEXT_HOME
# matplotlib.use('Agg')
DATA_FOLDER = userTextDirectory
# DATA_FOLDER = "/GEOTEXT_HOME/af/Downloads/review_polarity/txt_sentoken"
K_FOLD = 10
data_target = load_files(DATA_FOLDER, encoding=encoding)
filenames = data_target.filenames
DO_PCA = True
DO_SPARSEPCA = False
Reduction_D = 100
DO_SVD = False
categories = data_target.target_names
DO_NMF = False
def size_mb(docs):
return sum(len(s.encode(encoding)) for s in docs) / 1e6
data_size_mb = size_mb(data_target.data)
print("%d documents - %0.3fMB (all data set)" % (
len(data_target.data), data_size_mb))
print("%d categories" % len(categories))
print()
# split a training set and a test set
target = data_target.target
print("Extracting features from all the dataset using a sparse vectorizer")
t0 = 0
vectorizer = TfidfVectorizer(use_idf=True, norm='l2', binary=False, sublinear_tf=True, min_df=2, max_df=0.2, ngram_range=(1, 1), stop_words='english')
# vectorizer = CountVectorizer(min_df=2, max_df=1.0, ngram_range=(1, 4))
# the output of the fit_transform (x_train) is a sparse csc matrix.
data = vectorizer.fit_transform(data_target.data)
print data.dtype
data = csr_matrix(data, dtype=float32)
print data.dtype
duration = 1
print("done in %fs at %0.3fMB/s" % (duration, data_size_mb / duration))
print("n_samples: %d, n_features: %d" % data.shape)
print()
if DO_PCA:
print("dimension reduction pca with d=%d" % Reduction_D)
pca = PCA(n_components=Reduction_D, copy=True, whiten=False)
print type(data)
data = pca.fit_transform(data.todense())
if DO_SPARSEPCA:
print("dimension reduction sparsepca with d=%d" % Reduction_D)
spca = SparsePCA(Reduction_D)
data = spca.fit_transform(data.toarray())
if DO_SVD:
print("dimension reduction svd with d=%d" % Reduction_D)
svd = TruncatedSVD(n_components=Reduction_D, algorithm="randomized", n_iterations=5, random_state=None, tol=0)
data = svd.fit_transform(data)
if DO_NMF:
print("dimension reduction nmf with d=%d" % Reduction_D)
nmf = NMF(n_components=Reduction_D)
data = nmf.fit_transform(data)
DO_CHI = False
if DO_CHI:
print("Extracting best features by a chi-squared test")
ch2NumFeatures = 1000
ch2 = SelectKBest(chi2, k=ch2NumFeatures)
# print vectorizer.get_stop_words()
data = ch2.fit_transform(data, target)
# print data
KNN = 10
nn = NearestNeighbors(n_neighbors=KNN + 1, algorithm='ball_tree').fit(data)
# query and data are the same so every node is counted as its most similar here
distances, indices = nn.kneighbors(data)
with codecs.open(path.join(NeighborDirectory, 'neighbors.txt'), 'w', encoding) as outf:
nodeIndex = -1
nodeNeighbors = []
for neighbors in indices:
nodeIndex += 1
outf.write(path.basename(filenames[nodeIndex]) + ' ')
for neighbor in neighbors:
if neighbor == nodeIndex:
continue
else:
outf.write(path.basename(filenames[neighbor]) + ' ')
outf.write('\n')
示例9: range
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
n = 1
for FrameRange_ind in range(len(offset_list)):
for sparsePCA_alpha_ind in sparsePCA_alpha:
# for sparsePCA_ridge_alpha_ind in sparsePCA_ridge_alpha:
# compute PCA
ncomp = 5
offset = offset_list[FrameRange_ind]
upto = upto_list[FrameRange_ind]
# if ~upto:
# upto = O.Shapes().shape[0]
PCA_start = time.time()
p = SparsePCA(n_components=ncomp, alpha=sparsePCA_alpha_ind, ridge_alpha=0.01)
PCA_end = time.time()
print("The " + str(n) + " PCA time: " + str(PCA_end-PCA_start))
Projection_start = time.time()
scorePCA = p.fit_transform(O.Shapes()[offset:upto, :].T).T
Projection_end = time.time()
print("The " + str(n) + " Projection time: " + str(Projection_end-Projection_start))
# explained_variance_ratio = p.explained_variance_ratio_
plt.figure(1)
plt.plot(p.components_.T)
plt.legend(range(5))
plt.savefig("princomp/" + str(offset) + "to" + str(upto) + "_alpha" + str(sparsePCA_alpha_ind) + ".png", bbox_inches='tight')
plt.clf()
plt.figure(2)
plt.scatter(scorePCA[0, :10000], scorePCA[1, :10000], s=4)
plt.savefig("scatter/" + str(offset) + "to" + str(upto) + "_alpha" + str(sparsePCA_alpha_ind) + ".png", bbox_inches='tight')
plt.clf()
m = 1
示例10: transform
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
def transform(xTrain,yTrain,xTest):
pca = SparsePCA(n_components=2);
newXTrain = pca.fit_transform(xTrain,yTrain)
newXTest = pca.transform(xTest)
return newXTrain,newXTest
示例11: print
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
#csv = "c:/iris44.csv" # wikipedia Iris_flower_data_set
# 5.1,3.5,1.4,0.2 # ,Iris-setosa ...
N = 40
K = 450000
seed = 1
exec "\n".join( sys.argv[1:] ) # N= ...
np.random.seed(seed)
np.set_printoptions( 1, threshold=100, suppress=True ) # .1f
try:
A = np.genfromtxt( csv, delimiter="," )
N, K = A.shape
except IOError:
print('error')
A = np.random.normal( size=(N, K) ) # gen correlated ?
print(len(A[1]), N, K)
print "A:", A
#pca = PCA(n_components=4)
pca = SparsePCA(n_components=None, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, method='lars', n_jobs=1, U_init=None, V_init=None, verbose=False, random_state=None)
scores=pca.fit_transform(A)
pca_variance = pca.explained_variance_ratio_
coeff = pca.components_
#A1=pca.inverse_transform(coeff)
print(pca_variance)
print("coeff",coeff)
#score = pca.transform(A)
print("score",scores)
#print A1
示例12: sparse_pca
# 需要导入模块: from sklearn.decomposition import SparsePCA [as 别名]
# 或者: from sklearn.decomposition.SparsePCA import fit_transform [as 别名]
def sparse_pca(self, n_components, alpha):
pca = SparsePCA(n_components = 3, alpha = alpha)
self.X = pca.fit_transform(self.X)
self.df_c = pd.DataFrame(pca.components_.T, index = self.crimes, columns = [1,2,3])
return self.df_c