本文整理汇总了Python中sklearn.decomposition.TruncatedSVD方法的典型用法代码示例。如果您正苦于以下问题:Python decomposition.TruncatedSVD方法的具体用法?Python decomposition.TruncatedSVD怎么用?Python decomposition.TruncatedSVD使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition
的用法示例。
在下文中一共展示了decomposition.TruncatedSVD方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: optimize
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def optimize(self):
"""
Learning an embedding.
"""
print("\nOptimization started.\n")
self.embeddings = []
for step in tqdm(range(self.args.order)):
target_matrix = self._create_target_matrix()
svd = TruncatedSVD(n_components=self.args.dimensions,
n_iter=self.args.iterations,
random_state=self.args.seed)
svd.fit(target_matrix)
embedding = svd.transform(target_matrix)
self.embeddings.append(embedding)
示例2: test_resolve_embeddings
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def test_resolve_embeddings(self):
tdm = self.corpus.get_unigram_corpus().select(ClassPercentageCompactor(term_count=1))
embeddings_resolver = EmbeddingsResolver(tdm)
# embeddings = TruncatedSVD(n_components=20).fit_transform(tdm.get_term_doc_mat().T).T
# embeddings_resolver.set_embeddings(embeddings)
embeddings_resolver = embeddings_resolver.set_embeddings(tdm.get_term_doc_mat())
if self.assertRaisesRegex:
with self.assertRaisesRegex(Exception,
"You have already set embeddings by running set_embeddings or set_embeddings_model."):
embeddings_resolver.set_embeddings_model(None)
embeddings_resolver = EmbeddingsResolver(tdm)
embeddings_resolver = embeddings_resolver.set_embeddings_model(MockWord2Vec(tdm.get_terms()))
if self.assertRaisesRegex:
with self.assertRaisesRegex(Exception,
"You have already set embeddings by running set_embeddings or set_embeddings_model."):
embeddings_resolver.set_embeddings(tdm.get_term_doc_mat())
c, axes = embeddings_resolver.project_embeddings(projection_model=TruncatedSVD(3))
self.assertIsInstance(c, ParsedCorpus)
self.assertEqual(axes.to_dict(), pd.DataFrame(index=['speak'], data={'x': [0.,], 'y':[0.,]}).to_dict())
示例3: test_selective_tsvd
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def test_selective_tsvd():
original = X
cols = [original.columns[0], original.columns[1]] # Only perform on first two columns...
compare_cols = np.array(
original[['petal length (cm)', 'petal width (cm)']].as_matrix()) # should be the same as the trans cols
transformer = SelectiveTruncatedSVD(cols=cols, n_components=1).fit(original)
transformed = transformer.transform(original)
untouched_cols = np.array(transformed[['petal length (cm)', 'petal width (cm)']].as_matrix())
assert_array_almost_equal(compare_cols, untouched_cols)
assert 'Concept1' in transformed.columns
assert transformed.shape[1] == 3
assert isinstance(transformer.get_decomposition(), TruncatedSVD)
assert SelectiveTruncatedSVD().get_decomposition() is None # default None
# test the selective mixin
assert isinstance(transformer.cols, list)
示例4: transform
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def transform(self):
# ngrams
obs_ngrams = list(map(lambda x: ngram_utils._ngrams(x.split(" "), self.obs_ngram, "_"), self.obs_corpus))
target_ngrams = list(map(lambda x: ngram_utils._ngrams(x.split(" "), self.target_ngram, "_"), self.target_corpus))
# cooccurrence ngrams
cooc_terms = list(map(lambda lst1,lst2: self._get_cooc_terms(lst1, lst2, "X"), obs_ngrams, target_ngrams))
## tfidf
tfidf = self._init_word_ngram_tfidf(ngram=1)
X = tfidf.fit_transform(cooc_terms)
## svd
svd = TruncatedSVD(n_components=self.svd_dim,
n_iter=self.svd_n_iter, random_state=config.RANDOM_SEED)
return svd.fit_transform(X)
# 2nd in CrowdFlower (preprocessing_mikhail.py)
示例5: _reduce_dimensions
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def _reduce_dimensions(self, X):
"""
Using Truncated SVD.
Arg types:
* **X** *(Scipy COO or Numpy array)* - The wide feature matrix.
Return types:
* **X** *(Numpy array)* - The reduced feature matrix of nodes.
"""
svd = TruncatedSVD(n_components=self.reduction_dimensions,
n_iter=self.svd_iterations,
random_state=self.seed)
svd.fit(X)
X = svd.transform(X)
return X
示例6: _create_reduced_features
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def _create_reduced_features(self, X):
"""
Creating a dense reduced node feature matrix.
Arg types:
* **X** *(Scipy COO or Numpy array)* - The wide feature matrix.
Return types:
* **T** *(Numpy array)* - The reduced feature matrix of nodes.
"""
svd = TruncatedSVD(n_components=self.reduction_dimensions,
n_iter=self.svd_iterations,
random_state=self.seed)
svd.fit(X)
T = svd.transform(X)
return T.T
示例7: fit_truncatedSVD
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def fit_truncatedSVD(data):
'''
Fit the model with truncated SVD principal components
'''
# keyword parameters for the PCA
kwrd_params = {
'algorithm': 'randomized',
'n_components': 5,
'n_iter': 5,
'random_state': 42,
'tol': 0.0
}
# reduce the data
reduced = reduceDimensions(cd.TruncatedSVD,
data, **kwrd_params)
# prepare the data for the classifier
data_l = prepare_data(data, reduced,
kwrd_params['n_components'])
# fit the model
class_fit_predict_print(data_l)
# the file name of the dataset
示例8: test_random_hasher
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def test_random_hasher():
# test random forest hashing on circles dataset
# make sure that it is linearly separable.
# even after projected to two SVD dimensions
# Note: Not all random_states produce perfect results.
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# test fit and transform:
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
assert_array_equal(hasher.fit(X).transform(X).toarray(),
X_transformed.toarray())
# one leaf active per data point per forest
assert_equal(X_transformed.shape[0], X.shape[0])
assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)
linear_clf = LinearSVC()
linear_clf.fit(X_reduced, y)
assert_equal(linear_clf.score(X_reduced, y), 1.)
示例9: test_truncated_svd_eq_pca
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def test_truncated_svd_eq_pca():
# TruncatedSVD should be equal to PCA on centered data
X_c = X - X.mean(axis=0)
params = dict(n_components=10, random_state=42)
svd = TruncatedSVD(algorithm='arpack', **params)
pca = PCA(svd_solver='arpack', **params)
Xt_svd = svd.fit_transform(X_c)
Xt_pca = pca.fit_transform(X_c)
assert_allclose(Xt_svd, Xt_pca, rtol=1e-9)
assert_allclose(pca.mean_, 0, atol=1e-9)
assert_allclose(svd.components_, pca.components_)
示例10: dim_reduction_method
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def dim_reduction_method(self):
"""
select dimensionality reduction method
"""
if self.dim_reduction=='pca':
return PCA()
elif self.dim_reduction=='factor-analysis':
return FactorAnalysis()
elif self.dim_reduction=='fast-ica':
return FastICA()
elif self.dim_reduction=='kernel-pca':
return KernelPCA()
elif self.dim_reduction=='sparse-pca':
return SparsePCA()
elif self.dim_reduction=='truncated-svd':
return TruncatedSVD()
elif self.dim_reduction!=None:
raise ValueError('%s is not a supported dimensionality reduction method. Valid inputs are: \
"pca","factor-analysis","fast-ica,"kernel-pca","sparse-pca","truncated-svd".'
%(self.dim_reduction))
示例11: plot_z_run
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def plot_z_run(z_run, label, ):
f1, ax1 = plt.subplots(2, 1)
# First fit a PCA
PCA_model = TruncatedSVD(n_components=3).fit(z_run)
z_run_reduced = PCA_model.transform(z_run)
ax1[0].scatter(z_run_reduced[:, 0], z_run_reduced[:, 1], c=label, marker='*', linewidths=0)
ax1[0].set_title('PCA on z_run')
# THen fit a tSNE
tSNE_model = TSNE(verbose=2, perplexity=80, min_grad_norm=1E-12, n_iter=3000)
z_run_tsne = tSNE_model.fit_transform(z_run)
ax1[1].scatter(z_run_tsne[:, 0], z_run_tsne[:, 1], c=label, marker='*', linewidths=0)
ax1[1].set_title('tSNE on z_run')
plt.show()
return
示例12: __init__
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def __init__(self, k=3, **kwargs):
self.k = k
self.pipeline = Pipeline([
('norm', TextNormalizer(minimum=10, maximum=100)),
('tfidf', TfidfVectorizer()),
('knn', Pipeline([
('svd', TruncatedSVD(n_components=100)),
('model', KNNTransformer(k=self.k, algorithm='ball_tree'))
]))
])
self.lex_path = "lexicon.pkl"
self.vect_path = "vect.pkl"
self.vectorizer = False
self.lexicon = None
self.load()
示例13: fit_transform
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def fit_transform(self, documents):
# Vectorizer will be False if pipeline hasn't been fit yet,
# Trigger fit_transform and save the vectorizer and lexicon.
if self.vectorizer == False:
self.lexicon = self.pipeline.fit_transform(documents)
self.vect = self.pipeline.named_steps['tfidf']
self.knn = self.pipeline.named_steps['knn']
self.save()
# If there's a stored vectorizer and prefitted lexicon,
# use them instead.
else:
self.vect = self.vectorizer
self.knn = Pipeline([
('svd', TruncatedSVD(n_components=100)),
('knn', KNNTransformer(k=self.k, algorithm='ball_tree'))
])
self.knn.fit_transform(self.lexicon)
示例14: __init__
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def __init__(self, n_topics=50, estimator='LDA'):
"""
n_topics is the desired number of topics
To use Latent Semantic Analysis, set estimator to 'LSA',
To use Non-Negative Matrix Factorization, set estimator to 'NMF',
otherwise, defaults to Latent Dirichlet Allocation ('LDA').
"""
self.n_topics = n_topics
if estimator == 'LSA':
self.estimator = TruncatedSVD(n_components=self.n_topics)
elif estimator == 'NMF':
self.estimator = NMF(n_components=self.n_topics)
else:
self.estimator = LatentDirichletAllocation(n_topics=self.n_topics)
self.model = Pipeline([
('norm', TextNormalizer()),
('tfidf', CountVectorizer(tokenizer=identity,
preprocessor=None, lowercase=False)),
('model', self.estimator)
])
示例15: create_pipeline
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import TruncatedSVD [as 别名]
def create_pipeline(estimator, reduction=False):
steps = [
('normalize', TextNormalizer()),
('vectorize', TfidfVectorizer(
tokenizer=identity, preprocessor=None, lowercase=False
))
]
if reduction:
steps.append((
'reduction', TruncatedSVD(n_components=10000)
))
# Add the estimator
steps.append(('classifier', estimator))
return Pipeline(steps)