当前位置: 首页>>代码示例>>Python>>正文


Python LdaModel.print_topics方法代码示例

本文整理汇总了Python中gensim.models.ldamodel.LdaModel.print_topics方法的典型用法代码示例。如果您正苦于以下问题:Python LdaModel.print_topics方法的具体用法?Python LdaModel.print_topics怎么用?Python LdaModel.print_topics使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gensim.models.ldamodel.LdaModel的用法示例。


在下文中一共展示了LdaModel.print_topics方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_topics

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
def get_topics(candidate, day):
    start_time = datetime.strptime(day, "%Y-%m-%d").date()
    start_time = int(start_time.strftime('%s'))*1000
    end_time = start_time + 86399999
    try:
        client = MongoClient()
        tweets = client.fletcher.tweets
        tweets = tweets.aggregate([
            {"$match":{"$text":{"$search":candidate_search[candidate_slugs[candidate]]}}},
            {"$match":{"timestamp_ms":{"$gte":start_time,"$lt":end_time}}}])
        documents = []
        pattern = re.compile("[^a-zA-Z ]")
        for tweet in tweets:
            documents.append(pattern.sub('', tweet['text']))
        stoplist = set(candidate_stop_words[candidate_slugs[candidate]] + stopwords)
        texts = [[word for word in document.lower().split() if word not in stoplist]
                for document in documents]
        frequency = defaultdict(int)
        for text in texts:
            for token in text:
                frequency[token] += 1
        texts = [[token for token in text if frequency[token] > 1]
                for text in texts]
        dictionary = corpora.Dictionary(texts)
        corpus = [dictionary.doc2bow(text) for text in texts]
        lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=3, update_every=1, chunksize=10000, passes=10)
        return lda.print_topics(3)
    except:
        return None
开发者ID:GeorgeMcIntire,项目名称:politweets,代码行数:31,代码来源:twitter_functions.py

示例2: lda_topic_model

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
def lda_topic_model(data, is_clean=False, num_of_topics=10, num_of_pass=5):
    """do the topic model for the given dataset
    input:
        data: a documents or a list of words
        is_clean: Use this notation to pre-process the data.
        num_of_topics: An LDA model requires the user to determine how many
                        topics should be generated.
        num_of_pass: The greater the number of passes, the more accurate the
                    model will be.
                    A lot of passes can be slow on a very large corpus.
    """
    if not is_clean:
        stops = set(nltk.corpus.stopwords.words("english"))
        texts = prepare_for_lda(data, stops)
    else:
        texts = data
    dictionary = corpora.Dictionary(texts)
    print dictionary
    corpus = [dictionary.doc2bow(text) for text in texts]
    ldamodel = LdaModel(corpus, id2word=dictionary, num_topics=num_of_topics, \
                        passes=num_of_pass)
    return ldamodel.print_topics(num_topics=num_of_topics, num_words=10)
开发者ID:IamTao,项目名称:An-PUB-Engine,代码行数:24,代码来源:doc_features.py

示例3: analyze_speeches

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
def analyze_speeches(filename="1.txt"):
    """Read a speech file.

    Args:
    filename - speech file
    """
    dictionary = corpora.dictionary.Dictionary()
    train_documents = list()
    all_words = list()
    for i in xrange(1, GC.N_SPEECHES):
        filename = path_join(GC.SPEECH_FOLDER, str(i) + ".txt")
        with open(filename, "r") as speech_file:
            speech_words = list()
            for line in speech_file:
                words = line.strip().decode("utf8").split()
                words = [word for word in words if valid_word(word)]
                words = " ".join(map(unidecode, words))
                output = words.translate(string.maketrans("", ""), punct)
                speech_words += [word.lower() for word in output.split()
                                 if valid_word(word, True)]
            all_words += speech_words
            dictionary.add_documents([speech_words])
            train_documents.append(speech_words)
    corpus = [dictionary.doc2bow(text) for text in train_documents]
    lda = LdaModel(corpus=corpus,
                   id2word=dictionary,
                   num_topics=GC.N_TOPICS,
                   passes=10,
                   alpha='auto')

    print '{} topics with corresponding top {} words'.format(GC.N_TOPICS, 10)
    pprint(lda.print_topics())

    word_counter = Counter(all_words)
    print 'Top {} words in {} speeches of NaMo'.format(GC.N_TOP_WORDS,
                                                       GC.N_SPEECHES)
    pprint(word_counter.most_common(GC.N_TOP_WORDS))
开发者ID:pbamotra,项目名称:namo-speech-analysis,代码行数:39,代码来源:explorer.py

示例4: print

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
from util.TextSimilarity import TextSimilarity
from util.TaskReader import TaskReader

from gensim.corpora import Dictionary
from gensim.models.ldamodel import LdaModel

print("LDA Output: ")

first_num = 244

task = TaskReader.read("text.txt")
similarity = TextSimilarity('french')
doc_set = similarity.get_modified_text(task.text)
edu_set = similarity.get_modified_text(task.education)

dictionary = Dictionary([[x for x in i.split()] for i in edu_set])
for i in range(0, len(doc_set)):
    num = i + first_num
    corp = [x for x in doc_set[i].split()]
    corpus = [dictionary.doc2bow(corp)]
    ldamodel = LdaModel(corpus, num_topics=1, id2word=dictionary, passes=50)
    [print("Topic № " + str(num) + " : " + x[1]) for x in ldamodel.print_topics(num_topics=1, num_words=6)]
开发者ID:vovanlskspb,项目名称:machinelearning,代码行数:24,代码来源:Thematic.py

示例5: clean

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
# removes symbols given a text input
symbols = [',', '.', '-', '@']
def clean(word):
    word = word.lower()
    for symbol in symbols:
        word = word.replace(symbol, '')
    return word

# load dictionary that was generated by calling corpora.Dictionary() on TripAdvisor text
id2word = Dictionary.load('scraper/tripadvisor.dict')
# constructs a sparse matrix by calling corpora.MmCorpus.serialize() on the dictionary
mm = MmCorpus('scraper/tripadvisor.mm')
# call LDA, set topics to 100
lda = LdaModel(corpus=mm, id2word=id2word, num_topics=100, update_every=1, chunksize=100, passes=1)
topics = lda.print_topics(num_topics=100)

top_words = []

# construct top words list based on topics and words generated by LDA
for topic in topics:
    topic = topic.replace('+', '')
    topic_split = topic.split()
    for item in topic_split:
        item = item.strip().split('*')[1]
        top_words.append(clean(item))

# write top words to a file
f = open('scraper/top_words_lda.txt', 'w')
for word in top_words:
    f.write(word.encode('utf-8') + '\n')
开发者ID:jedijulia,项目名称:nlp-tourism,代码行数:32,代码来源:lda.py

示例6: MyCorpus

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
from gensim import corpora, models
from gensim.models.ldamodel import LdaModel
import pymongo

# put top k word here
top_k_words = [['human', 'graph', 'computer']]

dictionary =  corpora.Dictionary(top_k_words)

print dictionary.token2id

class MyCorpus(object):
	def __iter__(self):
		# change to get document from mongodb
		for line in open('mycorpus.txt'):
			yield dictionary.doc2bow(line.lower().split())

corpus = MyCorpus()

lda = LdaModel(corpus, num_topics = 2, id2word = dictionary)

print lda.print_topics(2)



开发者ID:Geekking,项目名称:nosql_new,代码行数:24,代码来源:lda.py

示例7: print

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
# print(tfidf[vec])
# index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features=12)
# sims = index[tfidf[vec]]
# print(list(enumerate(sims)))   
corpora.MmCorpus.save_corpus('file.mm', corpus)
#id2word= corpora.Dictionary.load('deerwester.dict')
mmCorpus = corpora.MmCorpus("file.mm")
print mmCorpus
lsi = LsiModel(mmCorpus, id2word=dictionary,num_topics=10)
print "lsi:"
#print(lsi[new_vec])
lsi.print_debug(4, 4)
lsi.print_topics(4,2)
lsi.show_topic(10, 10)

lda = LdaModel(mmCorpus,id2word=dictionary,num_topics=10)
lda.print_topics(4,4)
doc_lda = lda[new_vec]

print "lda:"
#print doc_lda
         
# corpus = [[(0, 1.0), (1, 1.0), (2, 1.0)],
#            [(2, 1.0), (3, 1.0), (4, 1.0), (5, 1.0), (6, 1.0), (8, 1.0)],
#            [(1, 1.0), (3, 1.0), (4, 1.0), (7, 1.0)],
#            [(0, 1.0), (4, 2.0), (7, 1.0)],
#            [(3, 1.0), (5, 1.0), (6, 1.0)],
#            [(9, 1.0)],
#            [(9, 1.0), (10, 1.0)],
#            [(9, 1.0), (10, 1.0), (11, 1.0)],
#            [(8, 1.0), (10, 1.0), (11, 1.0)]]
开发者ID:xun24fd,项目名称:LDApy,代码行数:33,代码来源:test.py

示例8: documents

# 需要导入模块: from gensim.models.ldamodel import LdaModel [as 别名]
# 或者: from gensim.models.ldamodel.LdaModel import print_topics [as 别名]
words,ids = dictionary.filter_n_most_frequent(50)
print words,"\n\n",ids

# Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean]


#Creating the object for LDA model using gensim library & Training LDA model on the document term matrix.
ldamodel = Lda(doc_term_matrix, num_topics=50, id2word = dictionary, passes=50, iterations=500)
ldafile = open('lda_model_sym_wiki.pkl','wb')
cPickle.dump(ldamodel,ldafile)
ldafile.close()

#Print all the 50 topics
for topic in ldamodel.print_topics(num_topics=50, num_words=10):
    print topic[0]+1, " ", topic[1],"\n"













开发者ID:ajaybha,项目名称:Topic-Modelling-on-Wiki-corpus,代码行数:18,代码来源:wiki_topic_model.py


注:本文中的gensim.models.ldamodel.LdaModel.print_topics方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。