本文整理汇总了Python中sklearn.utils.extmath.randomized_svd函数的典型用法代码示例。如果您正苦于以下问题:Python randomized_svd函数的具体用法?Python randomized_svd怎么用?Python randomized_svd使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了randomized_svd函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_randomized_svd_power_iteration_normalizer
def test_randomized_svd_power_iteration_normalizer():
# randomized_svd with power_iteration_normalized='none' diverges for
# large number of power iterations on this dataset
rng = np.random.RandomState(42)
X = make_low_rank_matrix(100, 500, effective_rank=50, random_state=rng)
X += 3 * rng.randint(0, 2, size=X.shape)
n_components = 50
# Check that it diverges with many (non-normalized) power iterations
U, s, V = randomized_svd(X, n_components, n_iter=2,
power_iteration_normalizer='none')
A = X - U.dot(np.diag(s).dot(V))
error_2 = linalg.norm(A, ord='fro')
U, s, V = randomized_svd(X, n_components, n_iter=20,
power_iteration_normalizer='none')
A = X - U.dot(np.diag(s).dot(V))
error_20 = linalg.norm(A, ord='fro')
assert_greater(np.abs(error_2 - error_20), 100)
for normalizer in ['LU', 'QR', 'auto']:
U, s, V = randomized_svd(X, n_components, n_iter=2,
power_iteration_normalizer=normalizer,
random_state=0)
A = X - U.dot(np.diag(s).dot(V))
error_2 = linalg.norm(A, ord='fro')
for i in [5, 10, 50]:
U, s, V = randomized_svd(X, n_components, n_iter=i,
power_iteration_normalizer=normalizer,
random_state=0)
A = X - U.dot(np.diag(s).dot(V))
error = linalg.norm(A, ord='fro')
assert_greater(15, np.abs(error_2 - error))
示例2: test_randomized_svd_low_rank
def test_randomized_svd_low_rank():
"""Check that extmath.randomized_svd is consistent with linalg.svd"""
n_samples = 100
n_features = 500
rank = 5
k = 10
# generate a matrix X of approximate effective rank `rank` and no noise
# component (very structured signal):
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=0.0, random_state=0)
assert_equal(X.shape, (n_samples, n_features))
# compute the singular values of X using the slow exact method
U, s, V = linalg.svd(X, full_matrices=False)
# compute the singular values of X using the fast approximate method
Ua, sa, Va = randomized_svd(X, k)
assert_equal(Ua.shape, (n_samples, k))
assert_equal(sa.shape, (k,))
assert_equal(Va.shape, (k, n_features))
# ensure that the singular values of both methods are equal up to the real
# rank of the matrix
assert_almost_equal(s[:k], sa)
# check the singular vectors too (while not checking the sign)
assert_almost_equal(np.dot(U[:, :k], V[:k, :]), np.dot(Ua, Va))
# check the sparse matrix representation
X = sparse.csr_matrix(X)
# compute the singular values of X using the fast approximate method
Ua, sa, Va = randomized_svd(X, k)
assert_almost_equal(s[:rank], sa[:rank])
示例3: test_randomized_svd_sign_flip_with_transpose
def test_randomized_svd_sign_flip_with_transpose():
# Check if the randomized_svd sign flipping is always done based on u
# irrespective of transpose.
# See https://github.com/scikit-learn/scikit-learn/issues/5608
# for more details.
def max_loading_is_positive(u, v):
"""
returns bool tuple indicating if the values maximising np.abs
are positive across all rows for u and across all columns for v.
"""
u_based = (np.abs(u).max(axis=0) == u.max(axis=0)).all()
v_based = (np.abs(v).max(axis=1) == v.max(axis=1)).all()
return u_based, v_based
mat = np.arange(10 * 8).reshape(10, -1)
# Without transpose
u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True)
u_based, v_based = max_loading_is_positive(u_flipped, v_flipped)
assert_true(u_based)
assert_false(v_based)
# With transpose
u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd(
mat, 3, flip_sign=True, transpose=True)
u_based, v_based = max_loading_is_positive(
u_flipped_with_transpose, v_flipped_with_transpose)
assert_true(u_based)
assert_false(v_based)
示例4: test_randomized_svd_transpose_consistency
def test_randomized_svd_transpose_consistency():
"""Check that transposing the design matrix has limit impact"""
n_samples = 100
n_features = 500
rank = 4
k = 10
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=0.5,
random_state=0)
assert_equal(X.shape, (n_samples, n_features))
U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False,
random_state=0)
U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True,
random_state=0)
U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto',
random_state=0)
U4, s4, V4 = linalg.svd(X, full_matrices=False)
assert_almost_equal(s1, s4[:k], decimal=3)
assert_almost_equal(s2, s4[:k], decimal=3)
assert_almost_equal(s3, s4[:k], decimal=3)
assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]),
decimal=2)
assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]),
decimal=2)
# in this case 'auto' is equivalent to transpose
assert_almost_equal(s2, s3)
示例5: test_randomized_svd_low_rank_with_noise
def test_randomized_svd_low_rank_with_noise():
"""Check that extmath.randomized_svd can handle noisy matrices"""
n_samples = 100
n_features = 500
rank = 5
k = 10
# generate a matrix X wity structure approximate rank `rank` and an
# important noisy component
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=0.5,
random_state=0)
assert_equal(X.shape, (n_samples, n_features))
# compute the singular values of X using the slow exact method
_, s, _ = linalg.svd(X, full_matrices=False)
# compute the singular values of X using the fast approximate method
# without the iterated power method
_, sa, _ = randomized_svd(X, k, n_iter=0)
# the approximation does not tolerate the noise:
assert_greater(np.abs(s[:k] - sa).max(), 0.05)
# compute the singular values of X using the fast approximate method with
# iterated power method
_, sap, _ = randomized_svd(X, k, n_iter=5)
# the iterated power method is helping getting rid of the noise:
assert_almost_equal(s[:k], sap, decimal=3)
示例6: get_svd_learn_clusters
def get_svd_learn_clusters(accu_path, data=None, sing_threshold=2.0, assign_clstr=0.1, vis=False):
"""First runs the decomposition for maximum number of singular values.
Then reruns on a subset > than some value"""
(N, f) = data.shape
all_components = min(N,f)
U, Sigma, VT = randomized_svd(data, n_components=all_components, n_iter=5, random_state=None)
# print "Sigma:", Sigma
best_components = sum(Sigma > sing_threshold)
U, Sigma, VT = randomized_svd(data, n_components=best_components, n_iter=5, random_state=None)
pred_labels = [np.argmax(doc) if np.max(doc) > assign_clstr else 100 for doc in U]
# print "predicted classes:", pred_labels
utils.screeplot(accu_path, Sigma, all_components, vis)
"""Plot a graph for each right singular vector (VT)"""
max_, min_ = 0, 100
min_=100
for i in VT:
if max(i)>max_: max_ = max(i)
if min(i)<min_: min_ = min(i)
if vis:
with open(accu_path + "/graphlets.p", 'r') as f:
graphlets = pickle.load(f)
for i, vocabulary in enumerate(VT):
title = 'Latent Concept %s' % i
utils.genome(accu_path, vocabulary, [min_, max_], title)
if vis:
for c, v in enumerate(vocabulary):
if v > 0.1:
print "\n",c, graphlets[c]
return U, Sigma, VT
示例7: _randomized_dpca
def _randomized_dpca(self,X,mXs,pinvX=None):
""" Solves the dPCA minimization problem analytically by using a randomized SVD solver from sklearn.
Returns
-------
P : dict mapping strings to array-like,
Holds encoding matrices for each term in variance decompostions (used to transform data
to low-dimensional space).
D : dict mapping strings to array-like,
Holds decoding matrices for each term in variance decompostions (used in inverse_transform
to map from low-dimensional representation back to original data space).
"""
n_features = X.shape[0]
rX = X.reshape((n_features,-1))
pinvX = pinv(rX) if pinvX is None else pinvX
P, D = {}, {}
for key in list(mXs.keys()):
mX = mXs[key].reshape((n_features,-1)) # called X_phi in paper
C = np.dot(mX,pinvX)
if isinstance(self.n_components,dict):
U,s,V = randomized_svd(np.dot(C,rX),n_components=self.n_components[key],n_iter=self.n_iter,random_state=np.random.randint(10e5))
else:
U,s,V = randomized_svd(np.dot(C,rX),n_components=self.n_components,n_iter=self.n_iter,random_state=np.random.randint(10e5))
P[key] = U
D[key] = np.dot(U.T,C).T
return P, D
示例8: test_randomized_svd_infinite_rank
def test_randomized_svd_infinite_rank():
"""Check that extmath.randomized_svd can handle noisy matrices"""
n_samples = 100
n_features = 500
rank = 5
k = 10
# let us try again without 'low_rank component': just regularly but slowly
# decreasing singular values: the rank of the data matrix is infinite
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=1.0,
random_state=0)
assert_equal(X.shape, (n_samples, n_features))
# compute the singular values of X using the slow exact method
_, s, _ = linalg.svd(X, full_matrices=False)
# compute the singular values of X using the fast approximate method
# without the iterated power method
_, sa, _ = randomized_svd(X, k, n_iter=0)
# the approximation does not tolerate the noise:
assert_greater(np.abs(s[:k] - sa).max(), 0.1)
# compute the singular values of X using the fast approximate method with
# iterated power method
_, sap, _ = randomized_svd(X, k, n_iter=5)
# the iterated power method is still managing to get most of the structure
# at the requested rank
assert_almost_equal(s[:k], sap, decimal=3)
示例9: get_bond_order
def get_bond_order(bofile, job_info, num_sv=4):
metal_ind = job_info['metal_ind']
natoms = job_info['natoms']
dict_bondorder = OrderedDict()
catoms = [metal_ind] + job_info['catoms']
dict_patterns = {}
for catom in catoms:
dict_patterns[catom] = [metal_ind, catom]
botext = list()
with open(bofile, 'r') as fo:
for line in fo:
if "bond order list" in line:
botext = list()
else:
botext.append(line)
bo_mat = np.zeros(shape=(natoms, natoms))
for line in botext:
ll = line.split()
row_idx, col_idx = int(ll[0]), int(ll[1])
bo_mat[row_idx, col_idx] = float(ll[2])
bo_mat[col_idx, row_idx] = float(ll[2])
U, Sigma, VT = randomized_svd(bo_mat, n_components=num_sv, n_iter=20)
sigma = Sigma.tolist()
for sv in range(num_sv):
dict_bondorder.update({'bo_sv%d' % sv: sigma[sv]})
bo_mat_off_diag = bo_mat.copy()
np.fill_diagonal(bo_mat_off_diag, 0)
_U, _Sigma, _VT = randomized_svd(bo_mat_off_diag, n_components=num_sv, n_iter=20)
_sigma = _Sigma.tolist()
for sv in range(num_sv):
dict_bondorder.update({'bo_offsv%d' % sv: _sigma[sv]})
for catom, vals in dict_patterns.items():
dict_bondorder.update({'bo_%d' % catom: bo_mat[vals[0], vals[1]]})
dict_bondorder = symmetricalize_dict(job_info, feature_dict=dict_bondorder)
return dict_bondorder
示例10: test_randomized_svd_sign_flip
def test_randomized_svd_sign_flip():
a = np.array([[2.0, 0.0], [0.0, 1.0]])
u1, s1, v1 = randomized_svd(a, 2, flip_sign=True, random_state=41)
for seed in range(10):
u2, s2, v2 = randomized_svd(a, 2, flip_sign=True, random_state=seed)
assert_almost_equal(u1, u2)
assert_almost_equal(v1, v2)
assert_almost_equal(np.dot(u2 * s2, v2), a)
assert_almost_equal(np.dot(u2.T, u2), np.eye(2))
assert_almost_equal(np.dot(v2.T, v2), np.eye(2))
示例11: _sv_thresh
def _sv_thresh(X, threshold, num_svalue):
"""
Perform singular value thresholding.
Parameters
---------
X : array of shape [n_samples, n_features]
The input array.
threshold : float
The threshold for the singualar values.
num_svalue : int
The number of singular values to compute.
Returns
-------
X_thresh : array of shape [n_samples, n_features]
The output after performing singular value thresholding.
grater_sv : int
The number of singular values of `X` which were greater than
`threshold`
(U, s, V): tuple
The singular value decomposition
"""
m, n = X.shape
U, s, V = randomized_svd(X, num_svalue)
greater_sv = np.count_nonzero(s > threshold)
s = _soft_thresh(s, threshold)
S = np.diag(s)
X_thresh = np.dot(U, np.dot(S, V))
return X_thresh, greater_sv, (U, s, V)
示例12: _fit
def _fit(self, gn):
from sklearn.utils.validation import check_random_state
from sklearn.utils.extmath import randomized_svd
# apply scaling
gn = self.scaler_.fit(gn).transform(gn)
# transpose for svd
# TODO eliminate need for transposition
x = gn.T
n_samples, n_features = x.shape
# intermediates
random_state = check_random_state(self.random_state)
n_components = self.n_components
n_samples, n_features = x.shape
# singular value decomposition
u, s, v = randomized_svd(x, n_components,
n_iter=self.iterated_power,
random_state=random_state)
# calculate explained variance
self.explained_variance_ = exp_var = (s ** 2) / n_samples
full_var = np.var(x, axis=0).sum()
self.explained_variance_ratio_ = exp_var / full_var
# store components
self.components_ = v
return u, s, v
示例13: _max_singular_value
def _max_singular_value(self, X_filled):
# quick decomposition of X_filled into rank-1 SVD
_, s, _ = randomized_svd(
X_filled,
1,
n_iter=5)
return s[0]
示例14: _svd_step
def _svd_step(self, X, shrinkage_value, max_rank=None):
"""
Returns reconstructed X from low-rank thresholded SVD and
the rank achieved.
"""
if max_rank:
# if we have a max rank then perform the faster randomized SVD
(U, s, V) = randomized_svd(
X,
max_rank,
n_iter=self.n_power_iterations)
else:
# perform a full rank SVD using ARPACK
(U, s, V) = np.linalg.svd(
X,
full_matrices=False,
compute_uv=True)
s_thresh = np.maximum(s - shrinkage_value, 0)
rank = (s_thresh > 0).sum()
s_thresh = s_thresh[:rank]
U_thresh = U[:, :rank]
V_thresh = V[:rank, :]
S_thresh = np.diag(s_thresh)
X_reconstruction = np.dot(U_thresh, np.dot(S_thresh, V_thresh))
return X_reconstruction, rank
示例15: run
def run():
start = datetime.now()
KING_ID = User.objects.get(username='jj').id
anime_titles = {}
anime_ids = set()
rs = list(Rating.objects.all().select_related('work'))
print(rs[0])
cp0 = datetime.now()
print(cp0 - start)
for i, rating in enumerate(rs, start=1):
if i % 1000 == 0:
print(i)
if rating.work.id not in anime_ids:
anime_ids.add(rating.work.id)
anime_titles[rating.work.id] = rating.work.title
cp1 = datetime.now()
print(cp1 - cp0)
seen_titles = set()
for rating in Rating.objects.filter(user__id=KING_ID).select_related('work'):
if rating.choice != 'willsee':
seen_titles.add(rating.work.title)
cp2 = datetime.now()
print(cp2 - cp1)
nb_users = max(user.id for user in User.objects.all())
nb_anime = len(anime_ids)
anime_ids = list(anime_ids)
inversed = {anime_ids[i]: i for i in range(nb_anime)}
print("Computing X: (%i×%i)" % (nb_users, nb_anime))
cp3 = datetime.now()
print(cp3 - cp2)
print(nb_users, '×', nb_anime)
values = {'like': 2, 'dislike': -2, 'neutral': 0.1, 'willsee': 0.5, 'wontsee': -0.5}
X = lil_matrix((nb_users + 1, nb_anime + 1))
for rating in Rating.objects.select_related('work', 'user'):
if rating.work.id < nb_anime:
X[rating.user.id, inversed[rating.work.id]] = values[rating.choice]
# Ranking computation
cp4 = datetime.now()
print(cp4 - cp3)
U, sigma, VT = randomized_svd(X, NB_COMPONENTS, n_iter=3, random_state=42)
XD = np.dot(np.dot(U, np.diag(sigma)), VT)
ranking = sorted((XD[KING_ID, j], anime_titles[anime_ids[j]]) for j in range(1, nb_anime + 1) if j in anime_titles)[::-1]
# Summarize the results of the ranking for KING_ID:
# “=> rank, title, score”
c = 0
for i, (rating, title) in enumerate(ranking, start=1):
if title not in seen_titles:
print('=>', i, title, rating)
c += 1
elif i < 10:
print(i, title, rating)
if c >= 10:
break
print(len(connection.queries))
for line in connection.queries:
print(line)
end = datetime.now()
print(end - start)