本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python LatentDirichletAllocation.partial_fit方法的具体用法?Python LatentDirichletAllocation.partial_fit怎么用?Python LatentDirichletAllocation.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.LatentDirichletAllocation
的用法示例。
在下文中一共展示了LatentDirichletAllocation.partial_fit方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_lda_transform_mismatch
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import partial_fit [as 别名]
def test_lda_transform_mismatch():
# test `n_features` mismatch in partial_fit and transform
rng = np.random.RandomState(0)
X = rng.randint(4, size=(20, 10))
X_2 = rng.randint(4, size=(10, 8))
n_topics = rng.randint(3, 6)
lda = LatentDirichletAllocation(n_topics=n_topics, random_state=rng)
lda.partial_fit(X)
assert_raises_regexp(ValueError, r"^The provided data has", lda.partial_fit, X_2)
示例2: test_lda_partial_fit_dim_mismatch
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import partial_fit [as 别名]
def test_lda_partial_fit_dim_mismatch():
# test `n_features` mismatch in `partial_fit`
rng = np.random.RandomState(0)
n_topics = rng.randint(3, 6)
n_col = rng.randint(6, 10)
X_1 = np.random.randint(4, size=(10, n_col))
X_2 = np.random.randint(4, size=(10, n_col + 1))
lda = LatentDirichletAllocation(n_topics=n_topics, learning_offset=5.,
total_samples=20, random_state=rng)
lda.partial_fit(X_1)
assert_raises_regexp(ValueError, r"^The provided data has", lda.partial_fit, X_2)
示例3: test_lda_partial_fit_multi_jobs
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import partial_fit [as 别名]
def test_lda_partial_fit_multi_jobs():
# Test LDA online training with multi CPU
rng = np.random.RandomState(0)
n_topics, X = _build_sparse_mtx()
lda = LatentDirichletAllocation(n_topics=n_topics, n_jobs=-1, learning_offset=5.,
total_samples=30, random_state=rng)
for i in xrange(3):
lda.partial_fit(X)
correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
for c in lda.components_:
top_idx = set(c.argsort()[-3:][::-1])
assert_true(tuple(sorted(top_idx)) in correct_idx_grps)
示例4: test_lda_partial_fit
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import partial_fit [as 别名]
def test_lda_partial_fit():
# Test LDA online learning (`partial_fit` method)
# (same as test_lda_batch)
rng = np.random.RandomState(0)
n_topics, X = _build_sparse_mtx()
lda = LatentDirichletAllocation(n_topics=n_topics, learning_offset=10.,
total_samples=100, random_state=rng)
for i in xrange(3):
lda.partial_fit(X)
correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
for c in lda.components_:
top_idx = set(c.argsort()[-3:][::-1])
assert_true(tuple(sorted(top_idx)) in correct_idx_grps)
示例5: range
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import partial_fit [as 别名]
test_scores = [] # size: (max_iter / valid_iter) * (n_splits)
train_perplexities = [] # size: (max_iter / valid_iter) * (n_splits)
test_perplexities = [] # size: (max_iter / valid_iter) * (n_splits)
for i in range(int(max_iter / valid_iter)):
train_s = []
test_s = []
train_p = []
test_p = []
print '\ntraining ', i * valid_iter + 1, '-th iteration'
for train_index, test_index in splited_index:
train_data, test_data = dataset[train_index], dataset[test_index]
lda_model.partial_fit(train_data)
train_s.append(lda_model.score(train_data))
test_s.append(lda_model.score(test_data))
train_p.append(lda_model.perplexity(train_data))
test_p.append(lda_model.perplexity(test_data))
train_scores.append(train_s)
test_scores.append(test_s)
train_perplexities.append(train_p)
test_perplexities.append(test_p)
print "train_scores: ", train_scores[i], " test_scores: ", test_scores[i], " train_perplexities: ", train_perplexities[i], " test_perplexities: ", test_perplexities[i]
示例6: ScikitLda
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import partial_fit [as 别名]
class ScikitLda(object):
def __init__(self, corpus=None, lda=None, n_topics=10,
max_iter=5, learning_method='online', learning_offset=50.,
**kwargs):
if lda is None:
self.lda = LatentDirichletAllocation(
n_topics=n_topics, max_iter=max_iter,
learning_method=learning_method,
learning_offset=learning_offset, **kwargs)
else:
self.lda = lda
self._corpus = corpus
self._weights = None
def fit(self):
self.lda.fit(self.corpus.sparse_matrix())
def partial_fit(self, corpus):
self.lda.partial_fit(corpus.sparse_matrix())
self._weights = None
@property
def topics(self):
return self.lda.components_
@property
def n_topics(self):
return self.lda.n_topics
@property
def corpus(self):
return self._corpus
@property
def weights(self):
if self._weights is None:
self._weights = self.partial_weights(self.corpus)
return self._weights
def partial_weights(self, corpus):
weights = self.transform(corpus)
return (weights.T / weights.sum(axis=1)).T
def transform(self, corpus):
return self.lda.transform(corpus.sparse_matrix())
def topic_words(self, n_words=10):
topicWords = []
topicWeightedWords = []
for topic_idx, topic in enumerate(self.topics):
weightedWordIdx = topic.argsort()[::-1]
wordsInTopic = [self.corpus.word(i)
for i in weightedWordIdx[:n_words]]
weights = topic / topic.sum()
topicWeights = [(weights[i], self.corpus.word(i))
for i in weightedWordIdx[:n_words]]
topicWords.append(wordsInTopic)
topicWeightedWords.append(topicWeights)
return (topicWords, topicWeightedWords)
def save(self, filename):
joblib.dump(self.lda, filename)
@classmethod
def load(cls, filename, corpus=None):
lda = joblib.load(filename)
return cls(lda=lda, corpus=corpus)