本文整理汇总了Python中sqlitedict.SqliteDict.update方法的典型用法代码示例。如果您正苦于以下问题:Python SqliteDict.update方法的具体用法?Python SqliteDict.update怎么用?Python SqliteDict.update使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sqlitedict.SqliteDict
的用法示例。
在下文中一共展示了SqliteDict.update方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _persist_v0
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import update [as 别名]
def _persist_v0(file_path, zg):
print 'Creating db...'
persisted = SqliteDict(file_path, autocommit=False)
print 'Updating data...'
persisted.update(zg.country_postal_codes)
print 'Committing data...'
persisted.commit()
示例2: main
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import update [as 别名]
def main(data_dir):
print 'Loading data...'
zg = Zipgun(data_dir, force_text=True)
print 'Creating db...'
persisted = SqliteDict(os.path.join(data_dir, DATA_FILE), autocommit=False)
print 'Updating data...'
persisted.update(zg.country_postal_codes)
print 'Committing data...'
persisted.commit()
示例3: _persist_v1
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import update [as 别名]
def _persist_v1(file_path, zg):
print 'Creating meta db...'
zipgun_info = SqliteDict(
file_path, tablename='zipgun_info', autocommit=False)
zipgun_info['version'] = 1
zipgun_info['country_codes'] = zg.country_postal_codes.keys()
zipgun_info.commit()
for country_code in zg.country_postal_codes:
print 'Creating {} db...'.format(country_code)
country_data = SqliteDict(
file_path, tablename='zg_{}'.format(country_code),
autocommit=False)
country_data.update(zg.country_postal_codes[country_code])
country_data.commit()
time.sleep(1.0) # Pretty bullshit
country_data.close()
zipgun_info.close()
示例4: reset
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import update [as 别名]
def reset(texts, index_dic=True, tfidf=True, hdp=False, lda=True, sim=False):
total_start = timeit.default_timer()
make_index_time = 0
make_dict_time = 0
make_lda_time = 0
make_tfidf_time = 0
sim_time = 0
hdptopicnum = 0
if index_dic:
f = [i.split(',') for i in texts.readlines()]
logging.info('Create id & ac_id list')
ids = [f[i][1] for i in range(len(f))]
ac_ids = [f[i][0] for i in range(len(f))]
logging.info('Create contents list')
contents = []
for i in range(len(f)):
if len(f[i]) == 3:
contents.append(f[i][2].strip().split(':'))
else:
contents.append([])
# make index
logging.info('***********Now Make Index by sqlitedict***********')
timer_start = timeit.default_timer()
pos2paid = zip(range(len(f)), ac_ids)
paid2pos_rel = {}
for key, paid in groupby(sorted(pos2paid, key=itemgetter(1)), key=itemgetter(1)):
paid2pos_rel.update({int(key): [i[0] for i in paid]})
id2pos_rel = dict(zip(ids, range(len(f))))
pos2id_rel = dict(zip(range(len(f)), ids))
id2pos = SqliteDict(filename=gl.res + '/resource/id2pos', autocommit=True)
id2pos.clear()
id2pos.update(id2pos_rel)
id2pos.close()
pos2id = SqliteDict(filename=gl.res + '/resource/pos2id', autocommit=True)
pos2id.clear()
pos2id.update(pos2id_rel)
pos2id.close()
paid2pos = SqliteDict(filename=gl.res + '/resource/paid2pos', autocommit=True)
paid2pos.clear()
paid2pos.update(paid2pos_rel)
paid2pos.close()
timer_end = timeit.default_timer()
make_index_time = timer_end - timer_start
# make dict
logging.info('***********Now Make Dictionary***********')
timer_start = timeit.default_timer()
dic = corpora.Dictionary(contents)
############## optimized dictionary
dic.filter_extremes(no_below=20, no_above=0.1, keep_n=None)
##############
dic.save(gl.res + '/resource/dict')
timer_end = timeit.default_timer()
make_dict_time = timer_end - timer_start
# make corpus
logging.info('***********Now Make Corpus***********')
temps = []
for i, t in enumerate(contents):
temps.append(dic.doc2bow(t))
if i % 10000 == 0:
logging.info('make corpus ' + str(i) + ' articles')
corpus = temps
corpora.MmCorpus.serialize(gl.res + '/resource/corpus', corpus)
if tfidf:
# do tfidf train
logging.info('***********Now Training TF-IDF Model***********')
timer_start = timeit.default_timer()
corpus = corpora.MmCorpus(gl.res + '/resource/corpus')
tfidf = models.TfidfModel(corpus)
tfidf.save(gl.res + '/resource/tfidf')
timer_end = timeit.default_timer()
make_tfidf_time = timer_end - timer_start
if hdp:
gc.collect()
corpus = corpora.MmCorpus(gl.res + '/resource/corpus')
dic = corpora.Dictionary.load(gl.res + '/resource/dict')
hdpmodel = models.hdpmodel.HdpModel(corpus, id2word=dic)
hdptopicnum = len(hdpmodel.print_topics(topics=-1, topn=10))
logging.info('hdptopicnum is {}'.format(hdptopicnum))
if lda:
# do lda train
gc.collect()
tfidf = models.TfidfModel.load(gl.res + '/resource/tfidf')
corpus = corpora.MmCorpus(gl.res + '/resource/corpus')
dic = corpora.Dictionary.load(gl.res + '/resource/dict')
corpus_tfidf = tfidf[corpus]
logging.info('***********Now Training LDA Model***********')
timer_start = timeit.default_timer()
if not hdptopicnum == 0:
gl.topicCount = hdptopicnum
lda = models.LdaMulticore(corpus_tfidf, id2word=dic, chunksize=gl.chunksize,
#.........这里部分代码省略.........
示例5: merge
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import update [as 别名]
def merge(texts, index_dic=True, tfidf=True, lda=True, sim=False):
total_start = timeit.default_timer()
make_index_time = 0
make_dict_time = 0
make_lda_time = 0
make_tfidf_time = 0
sim_time = 0
if index_dic:
f = [i.split(',') for i in texts.readlines()]
logging.info('Create id & ac_id list')
ids = [f[i][0] for i in range(len(f))]
ac_ids = [f[i][1] for i in range(len(f))]
logging.info('Create contents list')
contents = []
for i in range(len(f)):
if len(f[i]) == 3:
contents.append(f[i][2].strip().split(':'))
else:
contents.append([])
# make index
logging.info('***********Now merge index by sqlitedict***********')
timer_start = timeit.default_timer()
old_corpus_len = len(corpora.MmCorpus(gl.res + '/resource/corpus'))
pos2paid = zip(range(old_corpus_len, old_corpus_len + len(f)), ac_ids)
paid2pos_new = {}
for key, paid in groupby(sorted(pos2paid, key=itemgetter(1)), key=itemgetter(1)):
paid2pos_new.update({int(key): [i[0] for i in paid]})
id2pos_new = dict(zip(ids, range(old_corpus_len, old_corpus_len + len(f))))
pos2id_new = dict(zip(range(old_corpus_len, old_corpus_len + len(f)), ids))
id2pos = SqliteDict(filename=gl.res + '/resource/id2pos', autocommit=True)
id2pos.update(id2pos_new)
id2pos.close()
pos2id = SqliteDict(filename=gl.res + '/resource/pos2id', autocommit=True)
pos2id.update(pos2id_new)
pos2id.close()
paid2pos = SqliteDict(filename=gl.res + '/resource/paid2pos', autocommit=True)
x = [set(paid2pos_new.keys()), set([int(i) for i in paid2pos.iterkeys()])]
for i in list(set.intersection(*x)): # update duplicate key
temp = list(chain(paid2pos[i], paid2pos_new[i]))
paid2pos.update({int(i): temp})
paid2pos.close()
timer_end = timeit.default_timer()
make_index_time = timer_end - timer_start
# Merge dictionary
logging.info('***********Now merge Dictionary***********')
timer_start = timeit.default_timer()
newDict = corpora.Dictionary(contents)
newDict.filter_extremes(no_below=20, no_above=0.1, keep_n=None)
dic = corpora.Dictionary.load(gl.res + '/resource/dict')
dic.merge_with(newDict)
dic.save(gl.res + '/resource/dict')
timer_end = timeit.default_timer()
make_dict_time = timer_end - timer_start
# merge corpus
logging.info('***********Now merge Corpus***********')
temps = []
for i, t in enumerate(contents):
temps.append(dic.doc2bow(t))
if i % 10000 == 0:
logging.info('make corpus ' + str(i) + ' articles')
corpora.MmCorpus.serialize(gl.res + '/resource/new_c', temps)
gc.collect()
corpus = corpora.MmCorpus(gl.res + '/resource/corpus')
new_corpus = corpora.MmCorpus(gl.res + '/resource/new_c')
merged_corpus = chain(corpus, new_corpus)
corpora.MmCorpus.serialize(gl.res + '/resource/merged_c', merged_corpus) # Overwrite corpus
for filename in glob.glob(gl.res + '/resource/*'):
if filename.endswith('corpus') or filename.endswith('corpus.index') \
or filename.endswith('new_c') or filename.endswith('new_c.index'): # rm useless corpus
# os.remove(filename)
os.unlink(filename)
if filename.endswith('merged_c'): # rename to corpus
os.rename(filename, gl.res + '/resource/corpus')
if filename.endswith('merged_c.index'):
os.rename(filename, gl.res + '/resource/corpus.index')
if tfidf:
# do tfidf merge
gc.collect()
logging.info('***********Now merge TF-IDF model***********')
timer_start = timeit.default_timer()
for filename in glob.glob(gl.res + '/resource/*'): # backup old model
if filename.endswith('tfidf'):
os.rename(filename, filename + '_' + gl.c_time)
corpus = corpora.MmCorpus(gl.res + '/resource/corpus') # reload corpus
tfidf = models.TfidfModel(corpus)
tfidf.save(gl.res + '/resource/tfidf')
timer_end = timeit.default_timer()
make_tfidf_time = timer_end - timer_start
if lda:
# do lda merge
gc.collect()
tfidf = models.TfidfModel.load(gl.res + '/resource/tfidf')
#.........这里部分代码省略.........
示例6: main
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import update [as 别名]
def main(result_file, site_file, constant_modification_list=None, variable_modification_list=None,
enzyme_info=None, n_processes=4, output_file=None):
if output_file is None:
# output_file = os.path.splitext(result_file)[0] + '.theoretical_ions'
output_file = os.path.splitext(result_file)[0] + ".db"
else:
output_file += ".db"
modification_table = RestrictedModificationTable.bootstrap(constant_modification_list, variable_modification_list)
if constant_modification_list is None and variable_modification_list is None:
modification_table = ModificationTable.bootstrap()
if isinstance(site_file, basestring):
site_list = [line.strip() for line in open(site_file, "r")]
site_list = list(map(int, site_list))
else:
site_list = site_file
compo_dict = csv.DictReader(open(result_file, "r"), delimiter=",")
colnames = compo_dict.fieldnames
glycan_identity = get_glycan_identities(colnames)
enzyme_info = map(get_enzyme, enzyme_info)
tag = datetime.datetime.strftime(datetime.datetime.now(), "%Y%m%d-%H%M%S")
metadata = {
"glycan_identities": glycan_identity,
"constant_modifications": constant_modification_list,
"variable_modifications": variable_modification_list,
"site_list": site_list,
"ms1_output_file": result_file,
"enzyme": enzyme_info,
"tag": tag,
"enable_partial_hexnac_match": constants.PARTIAL_HEXNAC_LOSS
}
metadata_store = SqliteDict(output_file, tablename="metadata", flag='n')
metadata_store.update(metadata)
metadata_store.commit()
theoretical_search_space_store = SqliteDict(output_file, tablename="theoretical_search_space")
pool = multiprocessing.Pool(n_processes)
task_fn = functools.partial(process_predicted_ms1_ion, modification_table=modification_table,
site_list=site_list, glycan_identity=glycan_identity)
cntr = 0
if n_processes > 1:
logger.debug("Building theoretical sequences concurrently")
for res in (itertools.chain.from_iterable(pool.imap(task_fn, compo_dict, chunksize=500))):
theoretical_search_space_store[cntr] = res
cntr += 1
else:
logger.debug("Building theoretical sequences sequentially")
for row in compo_dict:
res = task_fn(row)
for item in res:
theoretical_search_space_store[cntr] = item
cntr += 1
if (cntr % 10000) == 0:
theoretical_search_space_store.commit()
logger.info("Committing, %d records made", cntr)
theoretical_search_space_store.commit()
theoretical_search_space_store.close()
pool.close()
pool.join()
pool.terminate()
logger.info("Hypothesis building complete")
return output_file
示例7: Bucket
# 需要导入模块: from sqlitedict import SqliteDict [as 别名]
# 或者: from sqlitedict.SqliteDict import update [as 别名]
class Bucket(object):
def __init__(self, bucket_name, storage_path=None):
''' Bucker init
- if the bucket exists, meta parameter will be ignored
'''
if bucket_name and isinstance(bucket_name, (str, unicode)) and re.match(r"^[a-z0-9\.\-_]+$", bucket_name, re.I):
self._name = bucket_name.strip()
else:
raise falcon.HTTPInvalidParam(
"The parameter shall contain only alpha-numeric characters, value: '%s'" % bucket_name,
param_name='name'
)
self._bucket_path = None
if storage_path and os.path.exists(storage_path):
self._bucket_path = os.path.join(storage_path, self._name)
else:
raise falcon.HTTPInternalServerError(
title='IncorrectStoragePath',
description='The storage path is incorrect, "%s"' % storage_path
)
if self._bucket_path and os.path.exists(self._bucket_path):
self._meta = SqliteDict(os.path.join(self._bucket_path,'metadata.sqlite'), 'bucket', autocommit=True)
else:
self._meta = SqliteDict(':memory:', 'bucket', autocommit=True)
@property
def bucket_path(self):
return self._bucket_path
@property
def metadata(self):
return dict(self._meta)
@metadata.setter
def metadata(self, value):
if value and isinstance(value, dict):
self._meta.update(value)
else:
raise RuntimeError('Incorrect metadata type. Found "%s", expected "dict"' % type(value))
def exists(self):
''' check if the bucket exists
'''
if self.bucket_path and os.path.exists(self.bucket_path):
return True
else:
return False
def create(self):
''' create new bucket
'''
if self.exists():
raise falcon.HTTPConflict(
title='BucketAlreadyExists',
description="The requested bucket name '%s' is not available. Please select a different name and try again." % self._name
)
# prepare bucket directory
try:
os.makedirs(self.bucket_path)
os.makedirs(os.path.join(self.bucket_path, 'data'))
os.makedirs(os.path.join(self.bucket_path, 'tmp'))
except IOError, err:
raise falcon.HTTPInternalServerError(
title='BucketCreationError',
description='The path to bucket cannot be created, "%s"' % self.bucket_path
)
# create metadata file in bucket directory
_meta = self._meta
self._meta = SqliteDict(os.path.join(self.bucket_path, 'metadata.sqlite'), 'bucket', autocommit=True)
self._meta.update(_meta)