本文整理汇总了Python中gensim.corpora.dictionary.Dictionary.from_corpus方法的典型用法代码示例。如果您正苦于以下问题:Python Dictionary.from_corpus方法的具体用法?Python Dictionary.from_corpus怎么用?Python Dictionary.from_corpus使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gensim.corpora.dictionary.Dictionary
的用法示例。
在下文中一共展示了Dictionary.from_corpus方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import from_corpus [as 别名]
class tip_rec:
def __init__(self, num_topics = 15):
self.numtopics = num_topics
self.topic_dict = dict(enumerate(np.zeros(num_topics)))
self.user_dict = {}
self.model = None
self.worddict = {}
self.mydict = None
def train(self, df):
self.user_dict = {el:self.topic_dict.copy() for el in df.sender.unique()}
cv = CV(stop_words='english')
X = cv.fit_transform(df['context'])
vocab = cv.vocabulary_.keys()
self.worddict=dict([(i, s) for i, s in enumerate(vocab)])
self.mydict = Dictionary()
self.mydict = self.mydict.from_corpus(matutils.Sparse2Corpus(X, documents_columns=False), id2word=self.worddict)
self.model = LatentDA.LdaModel(matutils.Sparse2Corpus(X, documents_columns=False), num_topics=self.numtopics, passes=20, id2word=self.worddict)
for i in df.iterrows():
if i[1]['context'] == '':
continue
else:
values = new_model[mydict.doc2bow(i[1]['context'].split())]
for val in values:
if val[0] in user_dict[i[1].sender].keys():
if i[1].amt == '':
continue
user_dict[i[1].sender][val[0]] += val[1] * float(i[1].amt)
continue
user_dict[i[1].sender][val[0]] = val[1]
for i in user_dict.keys():
norm_const = sum(user_dict[i].values())
for j in user_dict[i].keys():
user_dict[i][j] = user_dict[i][j]/norm_const
def predict(self, text, username = ''):
topics = self.model[self.mydict.doc2bow(text.split())]
doc_aff = np.zeros(self.numtopics)
for i in topics:
doc_aff[i[0]] = i[1]
if username == '':
returndict = {}
for user in self.user_dict.keys():
user_aff = np.array(self.user_dict[user].values())
score = np.linalg.norm(user_aff - doc_aff)
returndict[user] = score
return returndict
else:
user_aff = np.array(self.user_dict[username].values())
score = np.linalg.norm(user_aff - doc_aff)
return (username, score)
示例2: get_topics
# 需要导入模块: from gensim.corpora.dictionary import Dictionary [as 别名]
# 或者: from gensim.corpora.dictionary.Dictionary import from_corpus [as 别名]
def get_topics(cv, train_data):
"""
Uses gensim to perform topic modeling.
Parameters
---------
cv: A TfidfVectorizer instance.
train_data: A scipy csr_matrix.
Returns
-------
A list of strings (functions of the most important terms in each topic).
"""
td_gensim = Sparse2Corpus(train_data, documents_columns=False)
tmp_dct = dict((idv, word) for word, idv in cv.vocabulary_.items())
dct = Dictionary.from_corpus(td_gensim, id2word=tmp_dct)
lda = LdaModel(corpus=td_gensim, id2word=dct, num_topics=20)
topics = lda.top_topics(corpus=td_gensim, num_words=5)
return topics