当前位置: 首页>>代码示例>>Python>>正文


Python LatentDirichletAllocation.score方法代码示例

本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation.score方法的典型用法代码示例。如果您正苦于以下问题:Python LatentDirichletAllocation.score方法的具体用法?Python LatentDirichletAllocation.score怎么用?Python LatentDirichletAllocation.score使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.LatentDirichletAllocation的用法示例。


在下文中一共展示了LatentDirichletAllocation.score方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: lda_tuner

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def lda_tuner(ingroup_otu, best_models):

    best_score = -1*np.inf
    dtp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
    twp_series = [0.0001, 0.001, 0.01, 0.1, 0.2]
    topic_series = [3]
    X = ingroup_otu.values
    eval_counter = 0

    for topics in topic_series: 
        for dtp in dtp_series:
            for twp in twp_series:
                eval_counter +=1
                X_train, X_test = train_test_split(X, test_size=0.5)
                lda = LatentDirichletAllocation(n_topics=topics, 
                                                doc_topic_prior=dtp, 
                                                topic_word_prior=twp, 
                                                learning_method='batch',
                                                random_state=42,
                                                max_iter=20)
                lda.fit(X_train)
                this_score = lda.score(X_test)
                this_perplexity = lda.perplexity(X_test)
                if this_score > best_score:
                    best_score = this_score
                    print "New Max Likelihood: {}".format(best_score)

                print "#{}: n:{}, dtp:{}, twp:{}, score:{}, perp:{}".format(eval_counter, 
                                                                 topics, dtp, twp,
                                                                 this_score, this_perplexity)

                best_models.append({'n': topics, 'dtp': dtp, 'twp': twp,
                                    'score': this_score, 'perp': this_perplexity})
                if (dtp == dtp_series[-1]) and (twp == twp_series[-1]):
                    eval_counter +=1
                    X_train, X_test = train_test_split(X, test_size=0.5)
                    lda = LatentDirichletAllocation(n_topics=topics, 
                                                    doc_topic_prior=1./topics, 
                                                    topic_word_prior=1./topics, 
                                                    learning_method='batch',
                                                    random_state=42,
                                                    max_iter=20)
                    lda.fit(X_train)
                    this_score = lda.score(X_test)
                    this_perplexity = lda.perplexity(X_test)
                    if this_score > best_score:
                        best_score = this_score
                        print "New Max Likelihood: {}".format(best_score)

                    print "#{}: n:{}, dtp:{}, twp:{}, score:{} perp: {}".format(eval_counter, 
                                                                                topics, 
                                                                                (1./topics), 
                                                                                (1./topics),
                                                                                this_score,
                                                                                this_perplexity)

                    best_models.append({'n': topics, 'dtp': (1./topics), 
                                        'twp': (1./topics), 'score': this_score,
                                        'perp': this_perplexity})
    return best_models
开发者ID:karoraw1,项目名称:GLM_Wrapper,代码行数:62,代码来源:otu_ts_support.py

示例2: test_lda_score

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def test_lda_score():
    # Test LDA score for batch training
    # score should be higher after each iteration
    n_topics, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_1.fit_transform(X)
        score_1 = lda_1.score(X)

        lda_2.fit_transform(X)
        score_2 = lda_2.score(X)
        assert_greater_equal(score_2, score_1)
开发者ID:andaag,项目名称:scikit-learn,代码行数:17,代码来源:test_online_lda.py

示例3: topicmodel

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def topicmodel( comments ):

    _texts = []
    texts = []

    for c in comments:

        c = c['text']
        _texts.append( c )
        texts.append( c )



    tf_vectorizer = CountVectorizer(
                max_df=.20,
                min_df=10,
                stop_words = stopwords )
    texts = tf_vectorizer.fit_transform( texts )

    ## test between 2 and 20 topics
    topics = {}

    for k in range(2, 10):

        print "Testing", k

        model = LatentDirichletAllocation(
                    n_topics= k ,
                    max_iter=5,
                    learning_method='batch',
                    learning_offset=50.,
                    random_state=0
                )
        model.fit( texts )
        ll = model.score( texts )
        topics[ ll ] = model

    topic = max( topics.keys() )

    ret = collections.defaultdict( list )

    ## ugly, rewrite some day
    model = topics[ topic ]

    ## for debug pront chosen models' names
    feature_names = tf_vectorizer.get_feature_names()
    for topic_idx, topic in enumerate(model.components_):
        print "Topic #%d:" % topic_idx
        print " ".join( [feature_names[i].encode('utf8') for i in topic.argsort()[:-5 - 1:-1]])
        print

    for i, topic in enumerate( model.transform( texts ) ):

        topic = numpy.argmax( topic )
        text = _texts[ i ].encode('utf8')

        ret[ topic ].append( text )

    return ret
开发者ID:matnel,项目名称:hs-comments-visu,代码行数:61,代码来源:main.py

示例4: test_lda_score_perplexity

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
def test_lda_score_perplexity():
    # Test the relationship between LDA score and perplexity
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=10,
                                    random_state=0)
    lda.fit(X)
    perplexity_1 = lda.perplexity(X, sub_sampling=False)

    score = lda.score(X)
    perplexity_2 = np.exp(-1. * (score / np.sum(X.data)))
    assert_almost_equal(perplexity_1, perplexity_2)
开发者ID:AlexandreAbraham,项目名称:scikit-learn,代码行数:13,代码来源:test_online_lda.py

示例5: range

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
        test_perplexities = []  # size: (max_iter / valid_iter) * (n_splits)


        for i in range(int(max_iter / valid_iter)):
            train_s = []
            test_s = []
            train_p = []
            test_p = []

            print '\ntraining ', i * valid_iter + 1, '-th iteration'

            for train_index, test_index in splited_index:
                train_data, test_data = dataset[train_index], dataset[test_index]
                lda_model.partial_fit(train_data)

                train_s.append(lda_model.score(train_data))
                test_s.append(lda_model.score(test_data))

                train_p.append(lda_model.perplexity(train_data))
                test_p.append(lda_model.perplexity(test_data))

            train_scores.append(train_s)
            test_scores.append(test_s)
            train_perplexities.append(train_p)
            test_perplexities.append(test_p)

            print "train_scores: ", train_scores[i], " test_scores: ", test_scores[i], " train_perplexities: ", train_perplexities[i], " test_perplexities: ", test_perplexities[i]


        dict_num_topic[str(n_component) + '_topics'] = {
            "max_iter": max_iter, "valid_iter": valid_iter,
开发者ID:FYP-2018,项目名称:Topic-Modeling,代码行数:33,代码来源:cross_vali+converge+exploration_numTopic.py

示例6: LatentDirichletAllocation

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
                                stop_words='english')

tf = tf_vectorizer.fit_transform(blogs.article_body)



lda_eval2 = []

ldaRANGE = [9,10,11,12,13,14,15,16,17,18,19,20,30,40,50,60,70,80,90,100,150,200,300]

for n in ldaRANGE:
    lda = LatentDirichletAllocation(n_topics=n, max_iter=5,
                                    learning_method='online', learning_offset=50.,
                                    random_state=0)
    lda.fit(tf)
    score = lda.score(tf)
    perplexity = lda.perplexity(tf)
    print n,score,perplexity
    lda_eval2.append({'topics':n,'score':score,'perplexity':perplexity})

for item in lda_eval2:
    print item

lda_eval22 = pd.DataFrame(lda_eval2)

lda_eval22

import matplotlib.pyplot as plt

lda_eval22
plt.style.use('ggplot')
开发者ID:John-Tate,项目名称:DSI-Capstone,代码行数:33,代码来源:blogsNLP.py

示例7: LatentDirichletAllocation

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
n_samples = 2000
n_features = 1000
n_topics = 10
n_top_words = 20

lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
                                learning_method='online', learning_offset=50.,
                                random_state=0)

lda.fit(corpusVect)

tf_feature_names = vectorizer.get_feature_names()
print_top_words(lda, tf_feature_names, n_top_words)


lda.score(corpusVect)
lda.perplexity(corpusVect)

#### Titles

corp2 = dataWeek.title
CleanTextTransformer().fit(corp2)
corpCTT2 = CleanTextTransformer().transform(corp2)

corpCTTvect = vectorizer.fit_transform(corpCTT2)
corpusTitlesVect = pd.DataFrame(corpCTTvect.todense(),columns=vectorizer.get_feature_names())

lda2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
                                learning_method='online', learning_offset=50.,
                                random_state=0)
开发者ID:John-Tate,项目名称:DSI-Capstone,代码行数:32,代码来源:NLP.py

示例8: range

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import score [as 别名]
X = vectorizer.fit_transform(df.text)
vectorizer.get_feature_names()

vect_df = pd.DataFrame(X.toarray(), columns=[vectorizer.get_feature_names()])
vect_df.shape
vect_df.head()

lda_range= range(1,20)
lda_eval = []

for n in lda_range:
    lda = LatentDirichletAllocation(n_topics=n, max_iter=5,
                                    learning_method='online', learning_offset=50.,
                                    random_state=0)
    lda.fit(vect_df)
    score = lda.score(vect_df)
    perplexity = lda.perplexity(vect_df)
    print n,score,perplexity
    lda_eval.append({'topics':n,'score':score,'perplexity':perplexity})

for item in lda_eval:
    print item

lda = LatentDirichletAllocation(n_topics=5, n_jobs=-1)


topics = lda.fit_transform(vect_df)
lda.perplexity(vect_df)
lda.score(vect_df)
topics[2545]
df.ix[2545].text
开发者ID:cl65610,项目名称:lincolNLP,代码行数:33,代码来源:lincoln_topics.py


注:本文中的sklearn.decomposition.LatentDirichletAllocation.score方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。