本文整理汇总了Python中sqlitedict.SqliteDict.clear方法的典型用法代码示例。如果您正苦于以下问题:Python SqliteDict.clear方法的具体用法?Python SqliteDict.clear怎么用?Python SqliteDict.clear使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sqlitedict.SqliteDict
的用法示例。
在下文中一共展示了SqliteDict.clear方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reset
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import clear [as 别名]
def reset(texts, index_dic=True, tfidf=True, hdp=False, lda=True, sim=False):
total_start = timeit.default_timer()
make_index_time = 0
make_dict_time = 0
make_lda_time = 0
make_tfidf_time = 0
sim_time = 0
hdptopicnum = 0
if index_dic:
f = [i.split(',') for i in texts.readlines()]
logging.info('Create id & ac_id list')
ids = [f[i][1] for i in range(len(f))]
ac_ids = [f[i][0] for i in range(len(f))]
logging.info('Create contents list')
contents = []
for i in range(len(f)):
if len(f[i]) == 3:
contents.append(f[i][2].strip().split(':'))
else:
contents.append([])
# make index
logging.info('***********Now Make Index by sqlitedict***********')
timer_start = timeit.default_timer()
pos2paid = zip(range(len(f)), ac_ids)
paid2pos_rel = {}
for key, paid in groupby(sorted(pos2paid, key=itemgetter(1)), key=itemgetter(1)):
paid2pos_rel.update({int(key): [i[0] for i in paid]})
id2pos_rel = dict(zip(ids, range(len(f))))
pos2id_rel = dict(zip(range(len(f)), ids))
id2pos = SqliteDict(filename=gl.res + '/resource/id2pos', autocommit=True)
id2pos.clear()
id2pos.update(id2pos_rel)
id2pos.close()
pos2id = SqliteDict(filename=gl.res + '/resource/pos2id', autocommit=True)
pos2id.clear()
pos2id.update(pos2id_rel)
pos2id.close()
paid2pos = SqliteDict(filename=gl.res + '/resource/paid2pos', autocommit=True)
paid2pos.clear()
paid2pos.update(paid2pos_rel)
paid2pos.close()
timer_end = timeit.default_timer()
make_index_time = timer_end - timer_start
# make dict
logging.info('***********Now Make Dictionary***********')
timer_start = timeit.default_timer()
dic = corpora.Dictionary(contents)
############## optimized dictionary
dic.filter_extremes(no_below=20, no_above=0.1, keep_n=None)
##############
dic.save(gl.res + '/resource/dict')
timer_end = timeit.default_timer()
make_dict_time = timer_end - timer_start
# make corpus
logging.info('***********Now Make Corpus***********')
temps = []
for i, t in enumerate(contents):
temps.append(dic.doc2bow(t))
if i % 10000 == 0:
logging.info('make corpus ' + str(i) + ' articles')
corpus = temps
corpora.MmCorpus.serialize(gl.res + '/resource/corpus', corpus)
if tfidf:
# do tfidf train
logging.info('***********Now Training TF-IDF Model***********')
timer_start = timeit.default_timer()
corpus = corpora.MmCorpus(gl.res + '/resource/corpus')
tfidf = models.TfidfModel(corpus)
tfidf.save(gl.res + '/resource/tfidf')
timer_end = timeit.default_timer()
make_tfidf_time = timer_end - timer_start
if hdp:
gc.collect()
corpus = corpora.MmCorpus(gl.res + '/resource/corpus')
dic = corpora.Dictionary.load(gl.res + '/resource/dict')
hdpmodel = models.hdpmodel.HdpModel(corpus, id2word=dic)
hdptopicnum = len(hdpmodel.print_topics(topics=-1, topn=10))
logging.info('hdptopicnum is {}'.format(hdptopicnum))
if lda:
# do lda train
gc.collect()
tfidf = models.TfidfModel.load(gl.res + '/resource/tfidf')
corpus = corpora.MmCorpus(gl.res + '/resource/corpus')
dic = corpora.Dictionary.load(gl.res + '/resource/dict')
corpus_tfidf = tfidf[corpus]
logging.info('***********Now Training LDA Model***********')
timer_start = timeit.default_timer()
if not hdptopicnum == 0:
gl.topicCount = hdptopicnum
lda = models.LdaMulticore(corpus_tfidf, id2word=dic, chunksize=gl.chunksize,
#.........这里部分代码省略.........