本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation.perplexity方法的典型用法代码示例。如果您正苦于以下问题:Python LatentDirichletAllocation.perplexity方法的具体用法?Python LatentDirichletAllocation.perplexity怎么用?Python LatentDirichletAllocation.perplexity使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.LatentDirichletAllocation
的用法示例。
在下文中一共展示了LatentDirichletAllocation.perplexity方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: lda_tuner
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def lda_tuner(ingroup_otu, best_models):
best_score = -1*np.inf
dtp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
twp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
topic_series = [3]
X = ingroup_otu.values
eval_counter = 0
for topics in topic_series:
for dtp in dtp_series:
for twp in twp_series:
eval_counter +=1
X_train, X_test = train_test_split(X, test_size=0.5)
lda = LatentDirichletAllocation(n_topics=topics,
doc_topic_prior=dtp,
topic_word_prior=twp,
learning_method='batch',
random_state=42,
max_iter=20)
lda.fit(X_train)
this_score = lda.score(X_test)
this_perplexity = lda.perplexity(X_test)
if this_score > best_score:
best_score = this_score
print "New Max Likelihood: {}".format(best_score)
print "#{}: n:{}, dtp:{}, twp:{}, score:{}, perp:{}".format(eval_counter,
topics, dtp, twp,
this_score, this_perplexity)
best_models.append({'n': topics, 'dtp': dtp, 'twp': twp,
'score': this_score, 'perp': this_perplexity})
if (dtp == dtp_series[-1]) and (twp == twp_series[-1]):
eval_counter +=1
X_train, X_test = train_test_split(X, test_size=0.5)
lda = LatentDirichletAllocation(n_topics=topics,
doc_topic_prior=1./topics,
topic_word_prior=1./topics,
learning_method='batch',
random_state=42,
max_iter=20)
lda.fit(X_train)
this_score = lda.score(X_test)
this_perplexity = lda.perplexity(X_test)
if this_score > best_score:
best_score = this_score
print "New Max Likelihood: {}".format(best_score)
print "#{}: n:{}, dtp:{}, twp:{}, score:{} perp: {}".format(eval_counter,
topics,
(1./topics),
(1./topics),
this_score,
this_perplexity)
best_models.append({'n': topics, 'dtp': (1./topics),
'twp': (1./topics), 'score': this_score,
'perp': this_perplexity})
return best_models
示例2: test_perplexity_input_format
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def test_perplexity_input_format():
# Test LDA perplexity for sparse and dense input
# score should be the same for both dense and sparse input
n_components, X = _build_sparse_mtx()
lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
learning_method='batch',
total_samples=100, random_state=0)
lda.fit(X)
perp_1 = lda.perplexity(X)
perp_2 = lda.perplexity(X.toarray())
assert_almost_equal(perp_1, perp_2)
示例3: plot_perplexity_topics
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def plot_perplexity_topics(A_tfidf):
print "computing perplexity vs K..."
max_iter = 5 #based on plot_perplexity_iter()
#num_topics = np.linspace(2,20,5).astype(np.int)
num_topics = np.logspace(1,2,5).astype(np.int)
perplexity = []
em_iter = []
for k in num_topics:
lda = LatentDirichletAllocation(n_topics = k, max_iter=max_iter, learning_method='online', batch_size = 512, random_state=0, n_jobs=-1)
tic = time()
lda.fit(A_tfidf) #online VB
toc = time()
print "K= %d, elapsed time: %.4f sec" %(k, toc - tic)
perplexity.append(lda.perplexity(A_tfidf))
em_iter.append(lda.n_batch_iter_)
#end
np.save('./data/perplexity_topics.npy', perplexity)
np.save('./data/perplexity_topics2.npy', num_topics)
f = plt.figure()
plt.plot(num_topics, perplexity, color='b', marker='o', lw=2.0, label='perplexity')
plt.title('Perplexity (LDA, online VB)')
plt.xlabel('Number of Topics, K')
plt.ylabel('Perplexity')
plt.grid(True)
plt.legend()
plt.show()
f.savefig('./figures/perplexity_topics.png')
示例4: plot_perplexity_batch
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def plot_perplexity_batch(A_tfidf, num_docs):
print "computing perplexity vs batch size..."
max_iter = 5
num_topics = 10
batch_size = np.logspace(6, 10, 5, base=2).astype(int)
perplexity = np.zeros((len(batch_size),max_iter))
em_iter = np.zeros((len(batch_size),max_iter))
for ii, mini_batch in enumerate(batch_size):
for jj, sweep in enumerate(range(1,max_iter+1)):
lda = LatentDirichletAllocation(n_topics = num_topics, max_iter=sweep, learning_method='online', batch_size = mini_batch, random_state=0, n_jobs=-1)
tic = time()
lda.fit(A_tfidf) #online VB
toc = time()
print "sweep %d, elapsed time: %.4f sec" %(sweep, toc - tic)
perplexity[ii,jj] = lda.perplexity(A_tfidf)
em_iter[ii,jj] = lda.n_batch_iter_
#end
#end
np.save('./data/perplexity.npy', perplexity)
np.save('./data/em_iter.npy', em_iter)
f = plt.figure()
for mb in range(len(batch_size)):
plt.plot(em_iter[mb,:], perplexity[mb,:], color=np.random.rand(3,), marker='o', lw=2.0, label='mini_batch: '+str(batch_size[mb]))
plt.title('Perplexity (LDA, online VB)')
plt.xlabel('EM iter')
plt.ylabel('Perplexity')
plt.grid(True)
plt.legend()
plt.show()
f.savefig('./figures/perplexity_batch.png')
示例5: plot_perplexity_iter
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def plot_perplexity_iter(A_tfidf, num_topics):
print "computing perplexity vs iter..."
max_iter = 5
perplexity = []
em_iter = []
for sweep in range(1,max_iter+1):
lda = LatentDirichletAllocation(n_topics = num_topics, max_iter=sweep, learning_method='online', batch_size = 512, random_state=0, n_jobs=-1)
tic = time()
lda.fit(A_tfidf) #online VB
toc = time()
print "sweep %d, elapsed time: %.4f sec" %(sweep, toc - tic)
perplexity.append(lda.perplexity(A_tfidf))
em_iter.append(lda.n_batch_iter_)
#end
np.save('./data/perplexity_iter.npy', perplexity)
f = plt.figure()
plt.plot(em_iter, perplexity, color='b', marker='o', lw=2.0, label='perplexity')
plt.title('Perplexity (LDA, online VB)')
plt.xlabel('EM iter')
plt.ylabel('Perplexity')
plt.grid(True)
plt.legend()
plt.show()
f.savefig('./figures/perplexity_iter.png')
示例6: test_lda_perplexity
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def test_lda_perplexity():
# Test LDA perplexity for batch training
# perplexity should be lower after each iteration
n_topics, X = _build_sparse_mtx()
for method in ('online', 'batch'):
lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method=method,
total_samples=100, random_state=0)
lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, learning_method=method,
total_samples=100, random_state=0)
distr_1 = lda_1.fit_transform(X)
perp_1 = lda_1.perplexity(X, distr_1, sub_sampling=False)
distr_2 = lda_2.fit_transform(X)
perp_2 = lda_2.perplexity(X, distr_2, sub_sampling=False)
assert_greater_equal(perp_1, perp_2)
perp_1_subsampling = lda_1.perplexity(X, distr_1, sub_sampling=True)
perp_2_subsampling = lda_2.perplexity(X, distr_2, sub_sampling=True)
assert_greater_equal(perp_1_subsampling, perp_2_subsampling)
示例7: test_lda_score_perplexity
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def test_lda_score_perplexity():
# Test the relationship between LDA score and perplexity
n_components, X = _build_sparse_mtx()
lda = LatentDirichletAllocation(n_components=n_components, max_iter=10,
random_state=0)
lda.fit(X)
perplexity_1 = lda.perplexity(X, sub_sampling=False)
score = lda.score(X)
perplexity_2 = np.exp(-1. * (score / np.sum(X.data)))
assert_almost_equal(perplexity_1, perplexity_2)
示例8: test_lda_perplexity
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def test_lda_perplexity(method):
# Test LDA perplexity for batch training
# perplexity should be lower after each iteration
n_components, X = _build_sparse_mtx()
lda_1 = LatentDirichletAllocation(n_components=n_components,
max_iter=1, learning_method=method,
total_samples=100, random_state=0)
lda_2 = LatentDirichletAllocation(n_components=n_components,
max_iter=10, learning_method=method,
total_samples=100, random_state=0)
lda_1.fit(X)
perp_1 = lda_1.perplexity(X, sub_sampling=False)
lda_2.fit(X)
perp_2 = lda_2.perplexity(X, sub_sampling=False)
assert_greater_equal(perp_1, perp_2)
perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
assert_greater_equal(perp_1_subsampling, perp_2_subsampling)
示例9: test_lda_fit_perplexity
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
def test_lda_fit_perplexity():
# Test that the perplexity computed during fit is consistent with what is
# returned by the perplexity method
n_components, X = _build_sparse_mtx()
lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
learning_method='batch', random_state=0,
evaluate_every=1)
lda.fit(X)
# Perplexity computed at end of fit method
perplexity1 = lda.bound_
# Result of perplexity method on the train set
perplexity2 = lda.perplexity(X)
assert_almost_equal(perplexity1, perplexity2)
示例10: range
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
for i in range(int(max_iter / valid_iter)):
train_s = []
test_s = []
train_p = []
test_p = []
print '\ntraining ', i * valid_iter + 1, '-th iteration'
for train_index, test_index in splited_index:
train_data, test_data = dataset[train_index], dataset[test_index]
lda_model.partial_fit(train_data)
train_s.append(lda_model.score(train_data))
test_s.append(lda_model.score(test_data))
train_p.append(lda_model.perplexity(train_data))
test_p.append(lda_model.perplexity(test_data))
train_scores.append(train_s)
test_scores.append(test_s)
train_perplexities.append(train_p)
test_perplexities.append(test_p)
print "train_scores: ", train_scores[i], " test_scores: ", test_scores[i], " train_perplexities: ", train_perplexities[i], " test_perplexities: ", test_perplexities[i]
dict_num_topic[str(n_component) + '_topics'] = {
"max_iter": max_iter, "valid_iter": valid_iter,
"train_scores": train_scores, "test_scores": test_scores,
"train_perplexities": train_perplexities, "test_perplexities": test_perplexities
}
示例11: LatentDirichletAllocation
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
tf = tf_vectorizer.fit_transform(blogs.article_body)
lda_eval2 = []
ldaRANGE = [9,10,11,12,13,14,15,16,17,18,19,20,30,40,50,60,70,80,90,100,150,200,300]
for n in ldaRANGE:
lda = LatentDirichletAllocation(n_topics=n, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
lda.fit(tf)
score = lda.score(tf)
perplexity = lda.perplexity(tf)
print n,score,perplexity
lda_eval2.append({'topics':n,'score':score,'perplexity':perplexity})
for item in lda_eval2:
print item
lda_eval22 = pd.DataFrame(lda_eval2)
lda_eval22
import matplotlib.pyplot as plt
lda_eval22
plt.style.use('ggplot')
plt.scatter(lda_eval22['topics'],lda_eval22['perplexity'])
示例12: LatentDirichletAllocation
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
n_features = 1000
n_topics = 10
n_top_words = 20
lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
lda.fit(corpusVect)
tf_feature_names = vectorizer.get_feature_names()
print_top_words(lda, tf_feature_names, n_top_words)
lda.score(corpusVect)
lda.perplexity(corpusVect)
#### Titles
corp2 = dataWeek.title
CleanTextTransformer().fit(corp2)
corpCTT2 = CleanTextTransformer().transform(corp2)
corpCTTvect = vectorizer.fit_transform(corpCTT2)
corpusTitlesVect = pd.DataFrame(corpCTTvect.todense(),columns=vectorizer.get_feature_names())
lda2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
for n in range(2,16):
示例13: range
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import perplexity [as 别名]
vectorizer.get_feature_names()
vect_df = pd.DataFrame(X.toarray(), columns=[vectorizer.get_feature_names()])
vect_df.shape
vect_df.head()
lda_range= range(1,20)
lda_eval = []
for n in lda_range:
lda = LatentDirichletAllocation(n_topics=n, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
lda.fit(vect_df)
score = lda.score(vect_df)
perplexity = lda.perplexity(vect_df)
print n,score,perplexity
lda_eval.append({'topics':n,'score':score,'perplexity':perplexity})
for item in lda_eval:
print item
lda = LatentDirichletAllocation(n_topics=5, n_jobs=-1)
topics = lda.fit_transform(vect_df)
lda.perplexity(vect_df)
lda.score(vect_df)
topics[2545]
df.ix[2545].text