本文整理汇总了Python中sklearn.decomposition.TruncatedSVD.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python TruncatedSVD.partial_fit方法的具体用法?Python TruncatedSVD.partial_fit怎么用?Python TruncatedSVD.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.TruncatedSVD
的用法示例。
在下文中一共展示了TruncatedSVD.partial_fit方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TopicModel
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import partial_fit [as 别名]
#.........这里部分代码省略.........
random_state=kwargs.get('random_state', 1),
learning_method=kwargs.get('learning_method', 'online'),
learning_offset=kwargs.get('learning_offset', 10.0),
batch_size=kwargs.get('batch_size', 128),
n_jobs=kwargs.get('n_jobs', 1))
elif model == 'lsa':
self.model = TruncatedSVD(
n_components=n_topics,
algorithm=kwargs.get('algorithm', 'randomized'),
n_iter=kwargs.get('n_iter', 5),
random_state=kwargs.get('random_state', 1))
else:
msg = 'model "{}" invalid; must be {}'.format(
model, {'nmf', 'lda', 'lsa'})
raise ValueError(msg)
def __repr__(self):
return 'TopicModel(n_topics={}, model={})'.format(
self.n_topics, str(self.model).split('(', 1)[0])
def save(self, filename):
_ = joblib.dump(self.model, filename, compress=3)
logger.info('{} model saved to {}'.format(self.model, filename))
@classmethod
def load(cls, filename):
model = joblib.load(filename)
n_topics = model.n_topics if hasattr(model, 'n_topics') else model.n_components
return cls(model, n_topics=n_topics)
def fit(self, doc_term_matrix):
self.model.fit(doc_term_matrix)
def partial_fit(self, doc_term_matrix):
if isinstance(self.model, LatentDirichletAllocation):
self.model.partial_fit(doc_term_matrix)
else:
raise TypeError('only LatentDirichletAllocation models have partial_fit')
def transform(self, doc_term_matrix):
return self.model.transform(doc_term_matrix)
@property
def n_topics(self):
try:
return self.model.n_topics
except AttributeError:
return self.model.n_components
def get_doc_topic_matrix(self, doc_term_matrix, normalize=True):
"""
Transform a document-term matrix into a document-topic matrix, where rows
correspond to documents and columns to the topics in the topic model.
Args:
doc_term_matrix (array-like or sparse matrix): corpus represented as a
document-term matrix with shape (n_docs, n_terms); NOTE: LDA expects
tf-weighting, while NMF and LSA may do better with tfidf-weighting!
normalize (bool): if True, the values in each row are normalized,
i.e. topic weights on each document sum to 1
Returns:
``numpy.ndarray``: document-topic matrix with shape (n_docs, n_topics)
"""
doc_topic_matrix = self.transform(doc_term_matrix)
if normalize is True:
示例2: TopicModel
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import partial_fit [as 别名]
class TopicModel(object):
"""
Args:
model ({'nmf', 'lda', 'lsa'} or ``sklearn.decomposition.<model>``)
n_topics (int, optional): number of topics in the model to be initialized
kwargs:
variety of parameters used to initialize the model; see individual
sklearn pages for full details
Raises:
ValueError: if ``model`` not in ``{'nmf', 'lda', 'lsa'}`` or is not an
NMF, LatentDirichletAllocation, or TruncatedSVD instance
Notes:
- http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html
- http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.LatentDirichletAllocation.html
- http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html
"""
def __init__(self, model, n_topics=10, **kwargs):
if isinstance(model, (NMF, LatentDirichletAllocation, TruncatedSVD)):
self.model = model
else:
self.init_model(model, n_topics=n_topics, **kwargs)
def init_model(self, model, n_topics=10, **kwargs):
if model == 'nmf':
self.model = NMF(
n_components=n_topics,
alpha=kwargs.get('alpha', 0.1),
l1_ratio=kwargs.get('l1_ratio', 0.5),
max_iter=kwargs.get('max_iter', 200),
random_state=kwargs.get('random_state', 1),
shuffle=kwargs.get('shuffle', False))
elif model == 'lda':
self.model = LatentDirichletAllocation(
n_topics=n_topics,
max_iter=kwargs.get('max_iter', 10),
random_state=kwargs.get('random_state', 1),
learning_method=kwargs.get('learning_method', 'online'),
learning_offset=kwargs.get('learning_offset', 10.0),
batch_size=kwargs.get('batch_size', 128),
n_jobs=kwargs.get('n_jobs', 1))
elif model == 'lsa':
self.model = TruncatedSVD(
n_components=n_topics,
algorithm=kwargs.get('algorithm', 'randomized'),
n_iter=kwargs.get('n_iter', 5),
random_state=kwargs.get('random_state', 1))
else:
msg = 'model "{}" invalid; must be {}'.format(
model, {'nmf', 'lda', 'lsa'})
raise ValueError(msg)
def save(self, filename):
_ = joblib.dump(self.model, filename, compress=3)
logger.info('{} model saved to {}'.format(self.model, filename))
@classmethod
def load(cls, filename):
model = joblib.load(filename)
n_topics = model.n_topics if hasattr(model, 'n_topics') else model.n_components
return cls(model, n_topics=n_topics)
def fit(self, doc_term_matrix):
self.model.fit(doc_term_matrix)
def partial_fit(self, doc_term_matrix):
if isinstance(self.model, LatentDirichletAllocation):
self.model.partial_fit(doc_term_matrix)
else:
raise TypeError('only LatentDirichletAllocation models have partial_fit')
def transform(self, doc_term_matrix):
return self.model.transform(doc_term_matrix)
@property
def n_topics(self):
try:
return self.model.n_topics
except AttributeError:
return self.model.n_components
def get_doc_topic_matrix(self, doc_term_matrix, normalize=True):
"""
Transform a document-term matrix into a document-topic matrix, where rows
correspond to documents and columns to the topics in the topic model.
Args:
doc_term_matrix (array-like or sparse matrix): corpus represented as a
document-term matrix with shape (n_docs, n_terms); NOTE: LDA expects
tf-weighting, while NMF and LSA may do better with tfidf-weighting!
normalize (bool, optional): if True, the values in each row are normalized,
i.e. topic weights on each document sum to 1
Returns:
``numpy.ndarray``: document-topic matrix with shape (n_docs, n_topics)
"""
doc_topic_matrix = self.transform(doc_term_matrix)
if normalize is True:
return doc_topic_matrix / np.sum(doc_topic_matrix, axis=1, keepdims=True)
#.........这里部分代码省略.........
示例3: TopicModel
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import partial_fit [as 别名]
class TopicModel(object):
"""
Args:
model ({'nmf', 'lda', 'lsa'} or ``sklearn.decomposition.<model>``)
n_topics (int, optional): number of topics in the model to be initialized
kwargs:
variety of parameters used to initialize the model; see individual
sklearn pages for full details
Raises:
ValueError: if ``model`` not in ``{'nmf', 'lda', 'lsa'}`` or is not an
NMF, LatentDirichletAllocation, or TruncatedSVD instance
Notes:
- http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html
- http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.LatentDirichletAllocation.html
- http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html
"""
def __init__(self, model, n_topics=10, **kwargs):
if isinstance(model, (NMF, LatentDirichletAllocation, TruncatedSVD)):
self.model = model
else:
self.init_model(model, n_topics=n_topics, **kwargs)
def init_model(self, model, n_topics=10, **kwargs):
if model == 'nmf':
self.model = NMF(
n_components=n_topics,
alpha=kwargs.get('alpha', 0.1),
l1_ratio=kwargs.get('l1_ratio', 0.5),
max_iter=kwargs.get('max_iter', 200),
random_state=kwargs.get('random_state', 1),
shuffle=kwargs.get('shuffle', False))
elif model == 'lda':
self.model = LatentDirichletAllocation(
n_topics=n_topics,
max_iter=kwargs.get('max_iter', 10),
random_state=kwargs.get('random_state', 1),
learning_method=kwargs.get('learning_method', 'online'),
learning_offset=kwargs.get('learning_offset', 10.0),
batch_size=kwargs.get('batch_size', 128),
n_jobs=kwargs.get('n_jobs', 1))
elif model == 'lsa':
self.model = TruncatedSVD(
n_components=n_topics,
algorithm=kwargs.get('algorithm', 'randomized'),
n_iter=kwargs.get('n_iter', 5),
random_state=kwargs.get('random_state', 1))
else:
msg = 'model "{}" invalid; must be {}'.format(
model, {'nmf', 'lda', 'lsa'})
raise ValueError(msg)
def save(self, filename):
_ = joblib.dump(self.model, filename, compress=3)
logger.info('{} model saved to {}'.format(self.model, filename))
@classmethod
def load(cls, filename):
model = joblib.load(filename)
n_topics = model.n_topics if hasattr(model, 'n_topics') else model.n_components
return cls(model, n_topics=n_topics)
def fit(self, doc_term_matrix):
self.model.fit(doc_term_matrix)
def partial_fit(self, doc_term_matrix):
if isinstance(self.model, LatentDirichletAllocation):
self.model.partial_fit(doc_term_matrix)
else:
raise TypeError('only LatentDirichletAllocation models have partial_fit')
def transform(self, doc_term_matrix):
return self.model.transform(doc_term_matrix)
@property
def n_topics(self):
try:
return self.model.n_topics
except AttributeError:
return self.model.n_components
def get_doc_topic_matrix(self, doc_term_matrix, normalize=True):
"""
Transform a document-term matrix into a document-topic matrix, where rows
correspond to documents and columns to the topics in the topic model.
Args:
doc_term_matrix (array-like or sparse matrix): corpus represented as a
document-term matrix with shape (n_docs, n_terms); NOTE: LDA expects
tf-weighting, while NMF and LSA may do better with tfidf-weighting!
normalize (bool, optional): if True, the values in each row are normalized,
i.e. topic weights on each document sum to 1
Returns:
``numpy.ndarray``: document-topic matrix with shape (n_docs, n_topics)
"""
doc_topic_matrix = self.transform(doc_term_matrix)
if normalize is True:
return doc_topic_matrix / np.sum(doc_topic_matrix, axis=1, keepdims=True)
#.........这里部分代码省略.........