本文整理汇总了Python中sklearn.decomposition.NMF属性的典型用法代码示例。如果您正苦于以下问题:Python decomposition.NMF属性的具体用法?Python decomposition.NMF怎么用?Python decomposition.NMF使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类sklearn.decomposition
的用法示例。
在下文中一共展示了decomposition.NMF属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: write_topics
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def write_topics(ftopics, fwords, ftopics_words, poem_words, n_topic, n_topic_words):
count_matrix = count_vect.fit_transform(poem_words)
tfidf = TfidfTransformer().fit_transform(count_matrix)
nmf = decomposition.NMF(n_components=n_topic).fit(tfidf)
feature_names = count_vect.get_feature_names()
fw = codecs.open(ftopics, 'w', 'utf-8')
for topic in nmf.components_:
fw.write(' '.join([feature_names[i] for i in topic.argsort()[:-n_topic_words - 1:-1]]) + '\n')
fw.close()
print('Write topics done.')
fw = codecs.open(fwords, 'wb')
pickle.dump(feature_names, fw)
fw.close()
print('Write words done.')
fw = codecs.open(ftopics_words, 'wb')
pickle.dump(nmf.components_, fw)
fw.close()
print('Write topic_words done.')
示例2: _fit_and_score_NMF
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def _fit_and_score_NMF(self, new_residuals):
"""
Factorizing a residual matrix, returning the approximate target, and an embedding.
Arg types:
* **new_residuals** *(COO Scipy matrix)* - The residual matrix.
Return types:
* **scores** *(COO Scipy matrix)* - The residual scores.
* **W** *(Numpy array)* - The embedding matrix.
"""
model = NMF(n_components=self.dimensions,
init="random",
verbose=False,
alpha=self.alpha)
W = model.fit_transform(new_residuals)
H = model.components_
sub_scores = np.sum(np.multiply(W[self._index_1, :], H[:, self._index_2].T), axis=1)
scores = np.maximum(self._residuals.data-sub_scores, 0)
scores = sparse.csr_matrix((scores, (self._index_1, self._index_2)),
shape=self._shape,
dtype=np.float32)
return scores, W
示例3: __init__
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def __init__(self, n_topics=50, estimator='LDA'):
"""
n_topics is the desired number of topics
To use Latent Semantic Analysis, set estimator to 'LSA',
To use Non-Negative Matrix Factorization, set estimator to 'NMF',
otherwise, defaults to Latent Dirichlet Allocation ('LDA').
"""
self.n_topics = n_topics
if estimator == 'LSA':
self.estimator = TruncatedSVD(n_components=self.n_topics)
elif estimator == 'NMF':
self.estimator = NMF(n_components=self.n_topics)
else:
self.estimator = LatentDirichletAllocation(n_topics=self.n_topics)
self.model = Pipeline([
('norm', TextNormalizer()),
('tfidf', CountVectorizer(tokenizer=identity,
preprocessor=None, lowercase=False)),
('model', self.estimator)
])
示例4: fit_and_score_NMF
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def fit_and_score_NMF(self, new_residuals):
"""
Factorizing a residual matrix, returning the approximate target and an embedding.
:param new_residuals: Input target matrix.
:return scores: Approximate target matrix.
:return W: Embedding matrix.
"""
model = NMF(n_components=self.args.dimensions,
init="random",
verbose=False,
alpha=self.args.alpha)
W = model.fit_transform(new_residuals)
H = model.components_
print("Scoring started.\n")
sub_scores = np.sum(np.multiply(W[self.index_1, :], H[:, self.index_2].T), axis=1)
scores = np.maximum(self.residuals.data-sub_scores, 0)
scores = sparse.csr_matrix((scores, (self.index_1, self.index_2)),
shape=self.shape,
dtype=np.float32)
return scores, W
示例5: factorize_nmf
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def factorize_nmf():
print('factorizing matrix')
newsgroups_mmf_file = '/Users/fpena/tmp/nmf_graphlab/newsgroups/newsgroups_matrix.mmf'
document_term_matrix = mmread(newsgroups_mmf_file)
factorizer = decomposition.NMF(
init="nndsvd", n_components=Constants.TOPIC_MODEL_NUM_TOPICS,
max_iter=Constants.TOPIC_MODEL_ITERATIONS,
alpha=Constants.NMF_REGULARIZATION,
l1_ratio=Constants.NMF_REGULARIZATION_RATIO
)
document_topic_matrix = \
factorizer.fit_transform(document_term_matrix)
topic_term_matrix = factorizer.components_
# mmwrite(mmf_file, small_matrix)
# mmwrite(newsgroups_mmf_file, X)
示例6: train_nmf
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def train_nmf(corpus, n_topics=10, max_df=0.95, min_df=2,
cleaning=clearstring, stop_words='english'):
if cleaning is not None:
for i in range(len(corpus)):
corpus[i] = cleaning(corpus[i])
tfidf_vectorizer = TfidfVectorizer(
max_df=max_df, min_df=min_df, stop_words=stop_words)
tfidf = tfidf_vectorizer.fit_transform(corpus)
tfidf_features = tfidf_vectorizer.get_feature_names()
nmf = NMF(
n_components=n_topics,
random_state=1,
alpha=.1,
l1_ratio=.5,
init='nndsvd').fit(tfidf)
return TOPIC(tfidf_features, nmf)
示例7: nmf_to_onnx
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def nmf_to_onnx(W, H, op_version=12):
"""
The function converts a NMF described by matrices
*W*, *H* (*WH* approximate training data *M*).
into a function which takes two indices *(i, j)*
and returns the predictions for it. It assumes
these indices applies on the training data.
"""
col = OnnxArrayFeatureExtractor(H, 'col')
row = OnnxArrayFeatureExtractor(W.T, 'row')
dot = OnnxMul(col, row, op_version=op_version)
res = OnnxReduceSum(dot, output_names="rec", op_version=op_version)
indices_type = np.array([0], dtype=np.int64)
onx = res.to_onnx(inputs={'col': indices_type,
'row': indices_type},
outputs=[('rec', FloatTensorType((None, 1)))],
target_opset=op_version)
return onx
示例8: get_nmf_decomposition
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def get_nmf_decomposition(
X: np.ndarray,
n_roles: int,
) -> FactorTuple:
"""
Compute NMF decomposition
:param X: matrix to factor
:param n_roles: rank of decomposition
"""
nmf = NMF(n_components=n_roles, solver='mu', init='nndsvda')
with warnings.catch_warnings():
# ignore convergence warning from NMF since
# this will result in a large cost anyways
warnings.simplefilter('ignore')
G = nmf.fit_transform(X)
F = nmf.components_
return G, F
示例9: test_objectmapper
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.decomposition.PCA, decomposition.PCA)
self.assertIs(df.decomposition.IncrementalPCA,
decomposition.IncrementalPCA)
self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA)
self.assertIs(df.decomposition.FactorAnalysis,
decomposition.FactorAnalysis)
self.assertIs(df.decomposition.FastICA, decomposition.FastICA)
self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD)
self.assertIs(df.decomposition.NMF, decomposition.NMF)
self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA)
self.assertIs(df.decomposition.MiniBatchSparsePCA,
decomposition.MiniBatchSparsePCA)
self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder)
self.assertIs(df.decomposition.DictionaryLearning,
decomposition.DictionaryLearning)
self.assertIs(df.decomposition.MiniBatchDictionaryLearning,
decomposition.MiniBatchDictionaryLearning)
self.assertIs(df.decomposition.LatentDirichletAllocation,
decomposition.LatentDirichletAllocation)
示例10: factorize_string_matrix
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def factorize_string_matrix(self):
"""
Creating string labels by factorization.
"""
rows = [node for node, features in self.binned_features.items() for feature in features]
columns = [int(feature) for node, features in self.binned_features.items() for feature in features]
scores = [1 for i in range(len(columns))]
row_number = max(rows)+1
column_number = max(columns)+1
features = csr_matrix((scores, (rows, columns)), shape=(row_number, column_number))
model = NMF(n_components=self.args.factors, init="random", random_state=self.args.seed, alpha=self.args.beta)
factors = model.fit_transform(features)
kmeans = KMeans(n_clusters=self.args.clusters, random_state=self.args.seed).fit(factors)
labels = kmeans.labels_
features = {str(node): str(labels[node]) for node in self.graph.nodes()}
return features
示例11: apply
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def apply( self, X, k = 2 ):
"""
Apply NMF to the specified document-term matrix X.
"""
import nimfa
self.W = None
self.H = None
initialize_only = self.max_iters < 1
if self.update == "euclidean":
objective = "fro"
else:
objective = "div"
lsnmf = nimfa.Lsnmf(X, max_iter = self.max_iters, rank = k, seed = self.init_strategy, update = self.update, objective = objective, test_conv = self.test_conv )
res = lsnmf()
# TODO: fix
try:
self.W = res.basis().todense()
self.H = res.coef().todense()
except:
self.W = res.basis()
self.H = res.coef()
# last number of iterations
self.n_iter = res.n_iter
示例12: get_topics_from_model
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def get_topics_from_model(
self,
pipe=Pipeline([
('tfidf', TfidfTransformer(sublinear_tf=True)),
('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))]),
num_terms_per_topic=10):
'''
Parameters
----------
pipe : Pipeline
For example, `Pipeline([
('tfidf', TfidfTransformer(sublinear_tf=True)),
('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))])`
The last transformer must populate a `components_` attribute when finished.
num_terms_per_topic : int
Returns
-------
dict: {term: [term1, ...], ...}
'''
pipe.fit_transform(self.sentX)
topic_model = {}
for topic_idx, topic in enumerate(pipe._final_estimator.components_):
term_list = [self.termidxstore.getval(i)
for i
in topic.argsort()[:-num_terms_per_topic - 1:-1]
if topic[i] > 0]
if len(term_list) > 0:
topic_model['%s. %s' % (topic_idx, term_list[0])] = term_list
else:
Warning("Topic %s has no terms with scores > 0. Omitting." % (topic_idx))
return topic_model
示例13: skNMF
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def skNMF(data, dim):
model = NMF(n_components=dim)
model.fit(data)
return model.transform(data)
# Max-min norm
示例14: _sklearn_pretrain
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def _sklearn_pretrain(self, i):
"""
Pre-training a single layer of the model with sklearn.
Arg types:
* **i** *(int)* - The layer index.
"""
nmf_model = NMF(n_components=self.layers[i],
init="random",
random_state=self.seed,
max_iter=self.pre_iterations)
U = nmf_model.fit_transform(self._Z)
V = nmf_model.components_
return U, V
示例15: _pre_training
# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import NMF [as 别名]
def _pre_training(self):
"""
Pre-training each NMF layer.
"""
self._U_s = []
self._V_s = []
for i in range(self._p):
self._setup_z(i)
U, V = self._sklearn_pretrain(i)
self._U_s.append(U)
self._V_s.append(V)