Python decomposition.LatentDirichletAllocation方法代码示例

本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation方法的典型用法代码示例。如果您正苦于以下问题：Python decomposition.LatentDirichletAllocation方法的具体用法？Python decomposition.LatentDirichletAllocation怎么用？Python decomposition.LatentDirichletAllocation使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition的用法示例。

在下文中一共展示了decomposition.LatentDirichletAllocation方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_lda_preplexity_mismatch

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_lda_preplexity_mismatch():
    # test dimension mismatch in `perplexity` method
    rng = np.random.RandomState(0)
    n_components = rng.randint(3, 6)
    n_samples = rng.randint(6, 10)
    X = np.random.randint(4, size=(n_samples, 10))
    lda = LatentDirichletAllocation(n_components=n_components,
                                    learning_offset=5., total_samples=20,
                                    random_state=rng)
    lda.fit(X)
    # invalid samples
    invalid_n_samples = rng.randint(4, size=(n_samples + 1, n_components))
    assert_raises_regexp(ValueError, r'Number of samples',
                         lda._perplexity_precomp_distr, X, invalid_n_samples)
    # invalid topic number
    invalid_n_components = rng.randint(4, size=(n_samples, n_components + 1))
    assert_raises_regexp(ValueError, r'Number of topics',
                         lda._perplexity_precomp_distr, X,
                         invalid_n_components)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:21，代码来源:test_online_lda.py

示例2: test_lda_perplexity

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_lda_perplexity(method):
    # Test LDA perplexity for batch training
    # perplexity should be lower after each iteration
    n_components, X = _build_sparse_mtx()
    lda_1 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=1, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_2 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=10, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_1.fit(X)
    perp_1 = lda_1.perplexity(X, sub_sampling=False)

    lda_2.fit(X)
    perp_2 = lda_2.perplexity(X, sub_sampling=False)
    assert_greater_equal(perp_1, perp_2)

    perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
    perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
    assert_greater_equal(perp_1_subsampling, perp_2_subsampling)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:22，代码来源:test_online_lda.py

示例3: test_lda_score

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_lda_score(method):
    # Test LDA score for batch training
    # score should be higher after each iteration
    n_components, X = _build_sparse_mtx()
    lda_1 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=1, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_2 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=10, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_1.fit_transform(X)
    score_1 = lda_1.score(X)

    lda_2.fit_transform(X)
    score_2 = lda_2.score(X)
    assert_greater_equal(score_2, score_1)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:18，代码来源:test_online_lda.py

示例4: test_lda_fit_perplexity

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_lda_fit_perplexity():
    # Test that the perplexity computed during fit is consistent with what is
    # returned by the perplexity method
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                    learning_method='batch', random_state=0,
                                    evaluate_every=1)
    lda.fit(X)

    # Perplexity computed at end of fit method
    perplexity1 = lda.bound_

    # Result of perplexity method on the train set
    perplexity2 = lda.perplexity(X)

    assert_almost_equal(perplexity1, perplexity2)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:18，代码来源:test_online_lda.py

示例5: check_verbosity

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def check_verbosity(verbose, evaluate_every, expected_lines,
                    expected_perplexities):
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=3,
                                    learning_method='batch',
                                    verbose=verbose,
                                    evaluate_every=evaluate_every,
                                    random_state=0)
    out = StringIO()
    old_out, sys.stdout = sys.stdout, out
    try:
        lda.fit(X)
    finally:
        sys.stdout = old_out

    n_lines = out.getvalue().count('\n')
    n_perplexity = out.getvalue().count('perplexity')
    assert_equal(expected_lines, n_lines)
    assert_equal(expected_perplexities, n_perplexity)

开发者ID:PacktPublishing，项目名称:Mastering-Elasticsearch-7.0，代码行数:21，代码来源:test_online_lda.py

示例6: word2vec

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def word2vec(word_list,n_features=1000,topics = 5):
    tf_vectorizer = CountVectorizer(strip_accents='unicode',
                                    max_features=n_features,
                                    #stop_words='english',
                                    max_df=0.5,
                                    min_df=10)
    tf = tf_vectorizer.fit_transform(word_list)

    lda = LatentDirichletAllocation(n_components=topics,#主题数
                                    learning_method='batch',#样本量不大只是用来学习的话用"batch"比较好，这样可以少很多参数要调
                                    )
    #用变分贝叶斯方法训练模型
    lda.fit(tf)

    #依次输出每个主题的关键词表
    tf_feature_names = tf_vectorizer.get_feature_names()

    return lda,tf,tf_feature_names,tf_vectorizer

#将主题以可视化结果展现出来

开发者ID:starFalll，项目名称:Spider，代码行数:22，代码来源:LDA_Analysis.py

示例7: init

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def __init__(self, n_topics=50, estimator='LDA'):
        """
        n_topics is the desired number of topics
        To use Latent Semantic Analysis, set estimator to 'LSA',
        To use Non-Negative Matrix Factorization, set estimator to 'NMF',
        otherwise, defaults to Latent Dirichlet Allocation ('LDA').
        """
        self.n_topics = n_topics

        if estimator == 'LSA':
            self.estimator = TruncatedSVD(n_components=self.n_topics)
        elif estimator == 'NMF':
            self.estimator = NMF(n_components=self.n_topics)
        else:
            self.estimator = LatentDirichletAllocation(n_topics=self.n_topics)

        self.model = Pipeline([
            ('norm', TextNormalizer()),
            ('tfidf', CountVectorizer(tokenizer=identity,
                                      preprocessor=None, lowercase=False)),
            ('model', self.estimator)
        ])

开发者ID:foxbook，项目名称:atap，代码行数:24，代码来源:topics.py

示例8: train_lda

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def train_lda(corpus, n_topics=10, max_df=0.95, min_df=2,
              cleaning=clearstring, stop_words='english'):
    if cleaning is not None:
        for i in range(len(corpus)):
            corpus[i] = cleaning(corpus[i])
    tf_vectorizer = CountVectorizer(
        max_df=max_df,
        min_df=min_df,
        stop_words=stop_words)
    tf = tf_vectorizer.fit_transform(corpus)
    tf_features = tf_vectorizer.get_feature_names()
    lda = LatentDirichletAllocation(
        n_topics=n_topics,
        max_iter=5,
        learning_method='online',
        learning_offset=50.,
        random_state=0).fit(tf)
    return TOPIC(tf_features, lda)

开发者ID:huseinzol05，项目名称:Python-DevOps，代码行数:20，代码来源:topic.py

示例9: test_objectmapper

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.decomposition.PCA, decomposition.PCA)
        self.assertIs(df.decomposition.IncrementalPCA,
                      decomposition.IncrementalPCA)
        self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA)
        self.assertIs(df.decomposition.FactorAnalysis,
                      decomposition.FactorAnalysis)
        self.assertIs(df.decomposition.FastICA, decomposition.FastICA)
        self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD)
        self.assertIs(df.decomposition.NMF, decomposition.NMF)
        self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA)
        self.assertIs(df.decomposition.MiniBatchSparsePCA,
                      decomposition.MiniBatchSparsePCA)
        self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder)
        self.assertIs(df.decomposition.DictionaryLearning,
                      decomposition.DictionaryLearning)
        self.assertIs(df.decomposition.MiniBatchDictionaryLearning,
                      decomposition.MiniBatchDictionaryLearning)

        self.assertIs(df.decomposition.LatentDirichletAllocation,
                      decomposition.LatentDirichletAllocation)

开发者ID:pandas-ml，项目名称:pandas-ml，代码行数:24，代码来源:test_decomposition.py

示例10: test_lda_perplexity

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_lda_perplexity():
    # Test LDA perplexity for batch training
    # perplexity should be lower after each iteration
    n_components, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=1, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=10, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_1.fit(X)
        perp_1 = lda_1.perplexity(X, sub_sampling=False)

        lda_2.fit(X)
        perp_2 = lda_2.perplexity(X, sub_sampling=False)
        assert_greater_equal(perp_1, perp_2)

        perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
        perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
        assert_greater_equal(perp_1_subsampling, perp_2_subsampling)

开发者ID:alvarobartt，项目名称:twitter-stock-recommendation，代码行数:23，代码来源:test_online_lda.py

示例11: test_lda_score

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_lda_score():
    # Test LDA score for batch training
    # score should be higher after each iteration
    n_components, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=1, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=10, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_1.fit_transform(X)
        score_1 = lda_1.score(X)

        lda_2.fit_transform(X)
        score_2 = lda_2.score(X)
        assert_greater_equal(score_2, score_1)

开发者ID:alvarobartt，项目名称:twitter-stock-recommendation，代码行数:19，代码来源:test_online_lda.py

示例12: test_evaluation_sklearn_all_metrics

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_evaluation_sklearn_all_metrics():
    passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs', 'random_state'}
    varying_params = [dict(n_components=k) for k in range(2, 5)]
    const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1, random_state=1)

    evaluate_topic_models_kwargs = dict(
        metric=tm_sklearn.AVAILABLE_METRICS,
        held_out_documents_wallach09_n_samples=10,
        held_out_documents_wallach09_n_folds=2,
        coherence_gensim_vocab=EVALUATION_TEST_VOCAB,
        coherence_gensim_texts=EVALUATION_TEST_TOKENS,
        return_models=True,
    )

    eval_res = tm_sklearn.evaluate_topic_models(EVALUATION_TEST_DTM, varying_params, const_params,
                                                **evaluate_topic_models_kwargs)

    assert len(eval_res) == len(varying_params)

    for param_set, metric_results in eval_res:
        assert set(param_set.keys()) == passed_params
        assert set(metric_results.keys()) == set(tm_sklearn.AVAILABLE_METRICS + ('model',))

        assert metric_results['perplexity'] > 0
        assert 0 <= metric_results['cao_juan_2009'] <= 1
        assert 0 <= metric_results['arun_2010']
        assert metric_results['coherence_mimno_2011'] < 0
        assert np.isclose(metric_results['coherence_gensim_u_mass'], metric_results['coherence_mimno_2011'])
        assert 0 <= metric_results['coherence_gensim_c_v'] <= 1
        assert metric_results['coherence_gensim_c_uci'] < 0
        assert metric_results['coherence_gensim_c_npmi'] < 0

        if 'held_out_documents_wallach09' in tm_lda.AVAILABLE_METRICS:  # only if gmpy2 is installed
            assert metric_results['held_out_documents_wallach09'] < 0

        assert isinstance(metric_results['model'], LatentDirichletAllocation)

开发者ID:WZBSocialScienceCenter，项目名称:tmtoolkit，代码行数:38，代码来源:test_topicmod_evaluate.py

示例13: test_compute_models_parallel_sklearn

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def test_compute_models_parallel_sklearn():
    passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'}
    varying_params = [dict(n_components=k) for k in range(2, 5)]
    const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1)

    models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM, varying_params, const_params)

    assert len(models) == len(varying_params)

    for param_set, model in models:
        assert set(param_set.keys()) == passed_params
        assert isinstance(model, LatentDirichletAllocation)
        assert isinstance(model.components_, np.ndarray)

开发者ID:WZBSocialScienceCenter，项目名称:tmtoolkit，代码行数:15，代码来源:test_topicmod_evaluate.py

示例14: fitTopicModel

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def fitTopicModel(self, numTopics, max_iter=100, **kwargs):

        self.lda = LatentDirichletAllocation(n_topics=numTopics,learning_method=self.learningMethod,random_state=self.seed,
                                             n_jobs=1, max_iter=max_iter, batch_size=self.chunksize, **kwargs)
        if self.fragM.shape[0] > self.chunksize:
            # fit the model in chunks
            self.lda.learning_method = 'online'
            self.lda.fit(self.fragM)
        else:
            self.lda.fit(self.fragM)

开发者ID:rdkit，项目名称:CheTo，代码行数:12，代码来源:chemTopicModel.py

示例15: learn_topics

# 需要导入模块: from sklearn import decomposition [as 别名]
# 或者: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
def learn_topics(X, X_dev, K=50):
	lda = LatentDirichletAllocation(n_components=K, learning_method='online', verbose=1)
	print("Fitting", K, "topics...")
	lda.fit(X)
	score = lda.perplexity(X_dev)
	print("Log likelihood:", score)
	topics = lda.components_
	return score, lda, topics

开发者ID:blei-lab，项目名称:causal-text-embeddings，代码行数:10，代码来源:helpers.py

注：本文中的sklearn.decomposition.LatentDirichletAllocation方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。