当前位置: 首页>>代码示例>>Python>>正文


Python LatentDirichletAllocation.fit_transform方法代码示例

本文整理汇总了Python中sklearn.decomposition.LatentDirichletAllocation.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python LatentDirichletAllocation.fit_transform方法的具体用法?Python LatentDirichletAllocation.fit_transform怎么用?Python LatentDirichletAllocation.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.LatentDirichletAllocation的用法示例。


在下文中一共展示了LatentDirichletAllocation.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_lda_default_prior_params

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_lda_default_prior_params():
    # default prior parameter should be `1 / topics`
    # and verbose params should not affect result
    n_topics, X = _build_sparse_mtx()
    prior = 1. / n_topics
    lda_1 = LatentDirichletAllocation(n_topics=n_topics, doc_topic_prior=prior,
                                      topic_word_prior=prior, random_state=0)
    lda_2 = LatentDirichletAllocation(n_topics=n_topics, random_state=0)

    topic_distr_1 = lda_1.fit_transform(X)
    topic_distr_2 = lda_2.fit_transform(X)
    assert_almost_equal(topic_distr_1, topic_distr_2)
开发者ID:rsteca,项目名称:scikit-learn,代码行数:14,代码来源:test_online_lda.py

示例2: test_lda_score

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_lda_score():
    # Test LDA score for batch training
    # score should be higher after each iteration
    n_topics, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_1.fit_transform(X)
        score_1 = lda_1.score(X)

        lda_2.fit_transform(X)
        score_2 = lda_2.score(X)
        assert_greater_equal(score_2, score_1)
开发者ID:andaag,项目名称:scikit-learn,代码行数:17,代码来源:test_online_lda.py

示例3: basic_lda

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def basic_lda(df, n_topics=200, max_df=0.5, min_df=5):
    '''
    Basic LDA model for album recommendations

    Args:
        df: dataframe with Pitchfork reviews
        n_topics: number of lda topics
        max_df: max_df in TfidfVectorizer
        min_df: min_df in TfidfVectorizer
    Returns:
        tfidf: sklearn fitted TfidfVectorizer
        tfidf_trans: sparse matrix with tfidf transformed data
        lda: sklearn fitted LatentDirichletAllocation
        lda_trans: dense array with lda transformed data

    '''

    X = df['review']
    cv = CountVectorizer(stop_words='english',
                         min_df=5,
                         max_df=0.5)
    cv_trans = cv.fit_transform(X)

    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=7)
    lda_trans = lda.fit_transform(cv_trans)

    return cv, cv_trans, lda, lda_trans
开发者ID:lwoloszy,项目名称:albumpitch,代码行数:29,代码来源:eda.py

示例4: _get_model_LDA

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
 def _get_model_LDA(self, corpus):
     #lda = models.LdaModel(corpus, id2word=self.corpus.dictionary, num_topics=5, alpha='auto', eval_every=50)
     lda = LatentDirichletAllocation(n_topics=self.num_of_clusters, max_iter=20,
                                     learning_method='online',
                                     learning_offset=50.,
                                     random_state=1)
     return lda.fit_transform(corpus)
开发者ID:AnastasiaProk,项目名称:ws2018_forum_analyzer,代码行数:9,代码来源:cluster.py

示例5: produceLDATopics

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def produceLDATopics():
    '''
    Takes description of each game and uses sklearn's latent dirichlet allocation and count vectorizer
    to extract topics.
    :return: pandas data frame with topic weights for each game (rows) and topic (columns)
    '''
    data_samples, gameNames = create_game_profile_df(game_path)
    tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, max_features=n_features, stop_words='english')
    tf = tf_vectorizer.fit_transform(data_samples)
    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
                                    learning_method='online', learning_offset=50.,
                                    random_state=0)
    topics = lda.fit_transform(tf)
    # for i in range(50):
    #     gameTopics = []
    #     for j in range(len(topics[0])):
    #         if topics[i,j] > 1.0/float(n_topics):
    #             gameTopics.append(j)
    #     print gameNames[i], gameTopics
    topicsByGame = pandas.DataFrame(topics)
    topicsByGame.index = gameNames
    print topicsByGame

    tf_feature_names = tf_vectorizer.get_feature_names()
    for topic_idx, topic in enumerate(lda.components_):
        print("Topic #%d:" % topic_idx)
        print(" ".join([tf_feature_names[i]
                        for i in topic.argsort()[:-n_top_words - 1:-1]]))

    return topicsByGame
开发者ID:USF-ML2,项目名称:Steamed_Up,代码行数:32,代码来源:gameRec_getLDAtopics.py

示例6: score_lda

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def score_lda(src, dst):
	##read sentence pairs to two lists
	b1 = []
	b2 = []
	lines = 0
	with open(src) as p:
		for i, line in enumerate(p):
			s = line.split('\t')
			b1.append(s[0])
			b2.append(s[1][:-1]) #remove \n
			lines = i + 1

	vectorizer = CountVectorizer()
	vectors=vectorizer.fit_transform(b1 + b2)

	lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
                                learning_method='online', learning_offset=50.,
                                random_state=0)
	X = lda.fit_transform(vectors)
	print X.shape
	b1_v = vectorizer.transform(b1)
	b2_v = vectorizer.transform(b2)
	b1_vecs = lda.transform(b1_v)
	b2_vecs = lda.transform(b2_v)

	res = [round(5*(1 - spatial.distance.cosine(b1_vecs[i], b2_vecs[i])),2) for i in range(lines)]
	with open(dst, 'w') as thefile:
		thefile.write("\n".join(str(i) for i in res))
开发者ID:wintor12,项目名称:SemEval2015,代码行数:30,代码来源:run.py

示例7: get_features

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
    def get_features(vocab):
        vectorizer_head = TfidfVectorizer(vocabulary=vocab, use_idf=False, norm='l2')
        X_train_head = vectorizer_head.fit_transform(headlines)

        vectorizer_body = TfidfVectorizer(vocabulary=vocab, use_idf=False, norm='l2')
        X_train_body = vectorizer_body.fit_transform(bodies)

        # calculates n most important topics of the bodies. Each topic contains all words but ordered by importance. The
        # more important topic words a body contains of a certain topic, the higher its value for this topic
        lda_body = LatentDirichletAllocation(n_topics=n_topics, learning_method='online', random_state=0, n_jobs=3)

        print("latent_dirichlet_allocation_cos: fit and transform body")
        t0 = time()
        lda_body_matrix = lda_body.fit_transform(X_train_body)
        print("done in %0.3fs." % (time() - t0))

        print("latent_dirichlet_allocation_cos: transform head")
        # use the lda trained for body topcis on the headlines => if the headlines and bodies share topics
        # their vectors should be similar
        lda_head_matrix = lda_body.transform(X_train_head)

        #print_top_words(lda_body, vectorizer_body.get_feature_names(), 100)

        print('latent_dirichlet_allocation_cos: calculating cosine distance between head and body')
        # calculate cosine distance between the body and head
        X = []
        for i in range(len(lda_head_matrix)):
            X_head_vector = np.array(lda_head_matrix[i]).reshape((1, -1)) #1d array is deprecated
            X_body_vector = np.array(lda_body_matrix[i]).reshape((1, -1))
            cos_dist = cosine_distances(X_head_vector, X_body_vector).flatten()
            X.append(cos_dist.tolist())
        return X
开发者ID:paris5020,项目名称:athene_system,代码行数:34,代码来源:topic_models.py

示例8: latdirall

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def latdirall(content):
    lda = LatentDirichletAllocation(n_topics=10)
    tf_vectorizer = TfidfVectorizer(max_df=0.99, min_df=1,
                                stop_words='english')
    tf = tf_vectorizer.fit_transform(content)
    lolz = lda.fit_transform(tf)
    tfidf_feature_names = tf_vectorizer.get_feature_names()
    return top_topics(lda, tfidf_feature_names, 10)
开发者ID:nowittynamesleft,项目名称:Machine-Learning,代码行数:10,代码来源:bagofwords.py

示例9: test_lda_transform

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_lda_transform():
    # Test LDA transform.
    # Transform result cannot be negative
    rng = np.random.RandomState(0)
    X = rng.randint(5, size=(20, 10))
    n_topics = 3
    lda = LatentDirichletAllocation(n_topics=n_topics, random_state=rng)
    X_trans = lda.fit_transform(X)
    assert_true((X_trans > 0.0).any())
开发者ID:rsteca,项目名称:scikit-learn,代码行数:11,代码来源:test_online_lda.py

示例10: test_lda_perplexity

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_lda_perplexity():
    # Test LDA perplexity for batch training
    # perplexity should be lower after each iteration
    n_topics, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, learning_method=method,
                                          total_samples=100, random_state=0)
        distr_1 = lda_1.fit_transform(X)
        perp_1 = lda_1.perplexity(X, distr_1, sub_sampling=False)

        distr_2 = lda_2.fit_transform(X)
        perp_2 = lda_2.perplexity(X, distr_2, sub_sampling=False)
        assert_greater_equal(perp_1, perp_2)

        perp_1_subsampling = lda_1.perplexity(X, distr_1, sub_sampling=True)
        perp_2_subsampling = lda_2.perplexity(X, distr_2, sub_sampling=True)
        assert_greater_equal(perp_1_subsampling, perp_2_subsampling)
开发者ID:andaag,项目名称:scikit-learn,代码行数:21,代码来源:test_online_lda.py

示例11: test_lda_transform

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_lda_transform():
    # Test LDA transform.
    # Transform result cannot be negative and should be normalized
    rng = np.random.RandomState(0)
    X = rng.randint(5, size=(20, 10))
    n_topics = 3
    lda = LatentDirichletAllocation(n_topics=n_topics, random_state=rng)
    X_trans = lda.fit_transform(X)
    assert_true((X_trans > 0.0).any())
    assert_array_almost_equal(np.sum(X_trans, axis=1), np.ones(X_trans.shape[0]))
开发者ID:Allenw3u,项目名称:scikit-learn,代码行数:12,代码来源:test_online_lda.py

示例12: test_lda_fit_transform

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_lda_fit_transform(method):
    # Test LDA fit_transform & transform
    # fit_transform and transform result should be the same
    rng = np.random.RandomState(0)
    X = rng.randint(10, size=(50, 20))
    lda = LatentDirichletAllocation(n_components=5, learning_method=method,
                                    random_state=rng)
    X_fit = lda.fit_transform(X)
    X_trans = lda.transform(X)
    assert_array_almost_equal(X_fit, X_trans, 4)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:12,代码来源:test_online_lda.py

示例13: test_doc_topic_distr_deprecation

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_doc_topic_distr_deprecation():
    # Test that the appropriate warning message is displayed when a user
    # attempts to pass the doc_topic_distr argument to the perplexity method
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                    learning_method='batch',
                                    total_samples=100, random_state=0)
    distr1 = lda.fit_transform(X)
    distr2 = None
    assert_warns(DeprecationWarning, lda.perplexity, X, distr1)
    assert_warns(DeprecationWarning, lda.perplexity, X, distr2)
开发者ID:AlexandreAbraham,项目名称:scikit-learn,代码行数:13,代码来源:test_online_lda.py

示例14: test_lda_score_perplexity

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_lda_score_perplexity():
    # Test the relationship between LDA score and perplexity
    n_topics, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=10,
                                    random_state=0)
    distr = lda.fit_transform(X)
    perplexity_1 = lda.perplexity(X, distr, sub_sampling=False)

    score = lda.score(X)
    perplexity_2 = np.exp(-1. * (score / np.sum(X.data)))
    assert_almost_equal(perplexity_1, perplexity_2)
开发者ID:rsteca,项目名称:scikit-learn,代码行数:13,代码来源:test_online_lda.py

示例15: test_perplexity_input_format

# 需要导入模块: from sklearn.decomposition import LatentDirichletAllocation [as 别名]
# 或者: from sklearn.decomposition.LatentDirichletAllocation import fit_transform [as 别名]
def test_perplexity_input_format():
    # Test LDA perplexity for sparse and dense input
    # score should be the same for both dense and sparse input
    n_topics, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method='batch',
                                    total_samples=100, random_state=0)
    distr = lda.fit_transform(X)
    perp_1 = lda.perplexity(X)
    perp_2 = lda.perplexity(X, distr)
    perp_3 = lda.perplexity(X.toarray(), distr)
    assert_almost_equal(perp_1, perp_2)
    assert_almost_equal(perp_1, perp_3)
开发者ID:andaag,项目名称:scikit-learn,代码行数:14,代码来源:test_online_lda.py


注:本文中的sklearn.decomposition.LatentDirichletAllocation.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。