本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation.score方法的典型用法代码示例。如果您正苦于以下问题:Python LatentDirichletAllocation.score方法的具体用法?Python LatentDirichletAllocation.score怎么用?Python LatentDirichletAllocation.score使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.LatentDirichletAllocation
的用法示例。
在下文中一共展示了LatentDirichletAllocation.score方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: lda_tuner
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def lda_tuner(ingroup_otu, best_models):
best_score = -1*np.inf
dtp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
twp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
topic_series = [3]
X = ingroup_otu.values
eval_counter = 0
for topics in topic_series:
for dtp in dtp_series:
for twp in twp_series:
eval_counter +=1
X_train, X_test = train_test_split(X, test_size=0.5)
lda = LatentDirichletAllocation(n_topics=topics,
doc_topic_prior=dtp,
topic_word_prior=twp,
learning_method='batch',
random_state=42,
max_iter=20)
lda.fit(X_train)
this_score = lda.score(X_test)
this_perplexity = lda.perplexity(X_test)
if this_score > best_score:
best_score = this_score
print "New Max Likelihood: {}".format(best_score)
print "#{}: n:{}, dtp:{}, twp:{}, score:{}, perp:{}".format(eval_counter,
topics, dtp, twp,
this_score, this_perplexity)
best_models.append({'n': topics, 'dtp': dtp, 'twp': twp,
'score': this_score, 'perp': this_perplexity})
if (dtp == dtp_series[-1]) and (twp == twp_series[-1]):
eval_counter +=1
X_train, X_test = train_test_split(X, test_size=0.5)
lda = LatentDirichletAllocation(n_topics=topics,
doc_topic_prior=1./topics,
topic_word_prior=1./topics,
learning_method='batch',
random_state=42,
max_iter=20)
lda.fit(X_train)
this_score = lda.score(X_test)
this_perplexity = lda.perplexity(X_test)
if this_score > best_score:
best_score = this_score
print "New Max Likelihood: {}".format(best_score)
print "#{}: n:{}, dtp:{}, twp:{}, score:{} perp: {}".format(eval_counter,
topics,
(1./topics),
(1./topics),
this_score,
this_perplexity)
best_models.append({'n': topics, 'dtp': (1./topics),
'twp': (1./topics), 'score': this_score,
'perp': this_perplexity})
return best_models
示例2: test_lda_score
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def test_lda_score():
# Test LDA score for batch training
# score should be higher after each iteration
n_topics, X = _build_sparse_mtx()
for method in ('online', 'batch'):
lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method=method,
total_samples=100, random_state=0)
lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, learning_method=method,
total_samples=100, random_state=0)
lda_1.fit_transform(X)
score_1 = lda_1.score(X)
lda_2.fit_transform(X)
score_2 = lda_2.score(X)
assert_greater_equal(score_2, score_1)
示例3: topicmodel
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def topicmodel( comments ):
_texts = []
texts = []
for c in comments:
c = c['text']
_texts.append( c )
texts.append( c )
tf_vectorizer = CountVectorizer(
max_df=.20,
min_df=10,
stop_words = stopwords )
texts = tf_vectorizer.fit_transform( texts )
## test between 2 and 20 topics
topics = {}
for k in range(2, 10):
print "Testing", k
model = LatentDirichletAllocation(
n_topics= k ,
max_iter=5,
learning_method='batch',
learning_offset=50.,
random_state=0
)
model.fit( texts )
ll = model.score( texts )
topics[ ll ] = model
topic = max( topics.keys() )
ret = collections.defaultdict( list )
## ugly, rewrite some day
model = topics[ topic ]
## for debug pront chosen models' names
feature_names = tf_vectorizer.get_feature_names()
for topic_idx, topic in enumerate(model.components_):
print "Topic #%d:" % topic_idx
print " ".join( [feature_names[i].encode('utf8') for i in topic.argsort()[:-5 - 1:-1]])
print
for i, topic in enumerate( model.transform( texts ) ):
topic = numpy.argmax( topic )
text = _texts[ i ].encode('utf8')
ret[ topic ].append( text )
return ret
示例4: test_lda_score_perplexity
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def test_lda_score_perplexity():
# Test the relationship between LDA score and perplexity
n_components, X = _build_sparse_mtx()
lda = LatentDirichletAllocation(n_components=n_components, max_iter=10,
random_state=0)
lda.fit(X)
perplexity_1 = lda.perplexity(X, sub_sampling=False)
score = lda.score(X)
perplexity_2 = np.exp(-1. * (score / np.sum(X.data)))
assert_almost_equal(perplexity_1, perplexity_2)
示例5: range
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
test_perplexities = [] # size: (max_iter / valid_iter) * (n_splits)
for i in range(int(max_iter / valid_iter)):
train_s = []
test_s = []
train_p = []
test_p = []
print '\ntraining ', i * valid_iter + 1, '-th iteration'
for train_index, test_index in splited_index:
train_data, test_data = dataset[train_index], dataset[test_index]
lda_model.partial_fit(train_data)
train_s.append(lda_model.score(train_data))
test_s.append(lda_model.score(test_data))
train_p.append(lda_model.perplexity(train_data))
test_p.append(lda_model.perplexity(test_data))
train_scores.append(train_s)
test_scores.append(test_s)
train_perplexities.append(train_p)
test_perplexities.append(test_p)
print "train_scores: ", train_scores[i], " test_scores: ", test_scores[i], " train_perplexities: ", train_perplexities[i], " test_perplexities: ", test_perplexities[i]
dict_num_topic[str(n_component) + '_topics'] = {
"max_iter": max_iter, "valid_iter": valid_iter,
示例6: LatentDirichletAllocation
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
stop_words='english')
tf = tf_vectorizer.fit_transform(blogs.article_body)
lda_eval2 = []
ldaRANGE = [9,10,11,12,13,14,15,16,17,18,19,20,30,40,50,60,70,80,90,100,150,200,300]
for n in ldaRANGE:
lda = LatentDirichletAllocation(n_topics=n, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
lda.fit(tf)
score = lda.score(tf)
perplexity = lda.perplexity(tf)
print n,score,perplexity
lda_eval2.append({'topics':n,'score':score,'perplexity':perplexity})
for item in lda_eval2:
print item
lda_eval22 = pd.DataFrame(lda_eval2)
lda_eval22
import matplotlib.pyplot as plt
lda_eval22
plt.style.use('ggplot')
示例7: LatentDirichletAllocation
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
n_samples = 2000
n_features = 1000
n_topics = 10
n_top_words = 20
lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
lda.fit(corpusVect)
tf_feature_names = vectorizer.get_feature_names()
print_top_words(lda, tf_feature_names, n_top_words)
lda.score(corpusVect)
lda.perplexity(corpusVect)
#### Titles
corp2 = dataWeek.title
CleanTextTransformer().fit(corp2)
corpCTT2 = CleanTextTransformer().transform(corp2)
corpCTTvect = vectorizer.fit_transform(corpCTT2)
corpusTitlesVect = pd.DataFrame(corpCTTvect.todense(),columns=vectorizer.get_feature_names())
lda2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
示例8: range
# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
X = vectorizer.fit_transform(df.text)
vectorizer.get_feature_names()
vect_df = pd.DataFrame(X.toarray(), columns=[vectorizer.get_feature_names()])
vect_df.shape
vect_df.head()
lda_range= range(1,20)
lda_eval = []
for n in lda_range:
lda = LatentDirichletAllocation(n_topics=n, max_iter=5,
learning_method='online', learning_offset=50.,
random_state=0)
lda.fit(vect_df)
score = lda.score(vect_df)
perplexity = lda.perplexity(vect_df)
print n,score,perplexity
lda_eval.append({'topics':n,'score':score,'perplexity':perplexity})
for item in lda_eval:
print item
lda = LatentDirichletAllocation(n_topics=5, n_jobs=-1)
topics = lda.fit_transform(vect_df)
lda.perplexity(vect_df)
lda.score(vect_df)
topics[2545]
df.ix[2545].text