本文整理汇总了Python中bsddb.btopen方法的典型用法代码示例。如果您正苦于以下问题:Python bsddb.btopen方法的具体用法?Python bsddb.btopen怎么用?Python bsddb.btopen使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类bsddb
的用法示例。
在下文中一共展示了bsddb.btopen方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import bsddb [as 别名]
# 或者: from bsddb import btopen [as 别名]
def __init__(self, resource_prefix):
"""
Init the knowledge resource
:param resource_prefix - the resource directory and file prefix
"""
self.term_to_id = bsddb.btopen(resource_prefix + '_term_to_id.db', 'r')
self.id_to_term = bsddb.btopen(resource_prefix + '_id_to_term.db', 'r')
self.path_to_id = bsddb.btopen(resource_prefix + '_path_to_id.db', 'r')
self.id_to_path = bsddb.btopen(resource_prefix + '_id_to_path.db', 'r')
self.l2r_edges = bsddb.btopen(resource_prefix + '_l2r.db', 'r')
示例2: main
# 需要导入模块: import bsddb [as 别名]
# 或者: from bsddb import btopen [as 别名]
def main():
"""
Creates a "knowledge resource" from triplets file
"""
# Get the arguments
args = docopt("""Creates a knowledge resource from triplets file. Second step, uses the resource files
already created and converts the textual triplet file to a triplet file with IDs.
Usage:
create_resource_from_corpus_2.py <triplet_file> <resource_prefix>
<triplet_file> = a file containing the text triplets, formated as X\tY\tpath.
You can run this script on multiple portions of the triplet file at once and concatenate the output.
<resource_prefix> = the file names' prefix for the resource files
""")
triplet_file = args['<triplet_file>']
resource_prefix = args['<resource_prefix>']
# Load the resource DBs
term_to_id_db = bsddb.btopen(resource_prefix + '_term_to_id.db')
path_to_id_db = bsddb.btopen(resource_prefix + '_path_to_id.db')
with codecs.open(triplet_file) as f_in:
with codecs.open(triplet_file + '_id', 'w') as f_out:
for line in f_in:
try:
x, y, path = line.strip().split('\t')
except:
print line
continue
# Frequent path
x_id, y_id, path_id = term_to_id_db[x], term_to_id_db[y], path_to_id_db.get(path, -1)
if path_id != -1:
print >> f_out, '\t'.join(map(str, (x_id, y_id, path_id)))
示例3: test_keyordering
# 需要导入模块: import bsddb [as 别名]
# 或者: from bsddb import btopen [as 别名]
def test_keyordering(self):
if self.openmethod[0] is not bsddb.btopen:
return
keys = self.d.keys()
keys.sort()
self.assertEqual(self.f.first()[0], keys[0])
self.assertEqual(self.f.next()[0], keys[1])
self.assertEqual(self.f.last()[0], keys[-1])
self.assertEqual(self.f.previous()[0], keys[-2])
self.assertEqual(list(self.f), keys)
示例4: db_dump
# 需要导入模块: import bsddb [as 别名]
# 或者: from bsddb import btopen [as 别名]
def db_dump(filename, outfile = sys.stdout):
try:
f = bsddb.hashopen(filename, 'r')
db_type = "hash"
except:
f = bsddb.btopen(filename, 'r')
db_type = "btree"
outfile.write("VERSION=3\n") # magic
outfile.write("format=bytevalue\n")
outfile.write("type=%s\n" % (db_type))
outfile.write("HEADER=END\n")
for (key,value) in f.iteritems():
outfile.write(" ")
for c in key:
outfile.write("%02x" % ord(c))
outfile.write("\n")
outfile.write(" ")
for c in value:
outfile.write("%02x" % ord(c))
outfile.write("\n")
outfile.write("DATA=END\n")
#-----------------------------------------------------------------------------
# vim:ft=python
示例5: main
# 需要导入模块: import bsddb [as 别名]
# 或者: from bsddb import btopen [as 别名]
def main():
"""
Creates a "knowledge resource" from triplets file
"""
# Get the arguments
args = docopt("""Creates a knowledge resource from triplets file. Third step, uses the ID-based triplet file
and converts it to the '_l2r.db' file.
Usage:
create_resource_from_corpus_3.py <id_triplet_file> <resource_prefix>
<id_triplet_file> = a file containing the int triplets, formated as X_id\tY_id\tpath_id\tcount, where
count is the number of times X and Y occurred together in this path. You can obtain such a file by
counting the number of occurrences of each line in the file produced by the second step, e.g.:
awk '{i[$0]++} END{for(x in i){print x"\t"i[x]}}' triplet_file > id_triplet_file
If you split the files in the second step, apply this command to each one of them, and then sum them up, e.g.:
for each i, run: awk '{i[$0]++} END{for(x in i){print x"\t"i[x]}}' triplet_file_i > id_triplet_file_i
cat id_triplet_file_* > id_triplet_file_temp
Then, run: awk -F$'\t' '{i[$1,"\t",$2,"\t",$3]+=$4} END{for(x in i){print x"\t"i[x]}}' id_triplet_file_temp > id_triplet_file
<resource_prefix> = the file names' prefix for the resource files
""")
id_triplet_file = args['<id_triplet_file>']
resource_prefix = args['<resource_prefix>']
l2r_db = bsddb.btopen(resource_prefix + '_l2r.db', 'c')
l2r_dict = defaultdict(str)
with codecs.open(id_triplet_file) as f_in:
for ct, line in tqdm(enumerate(f_in)):
try:
x, y, path, count = line.strip().split('\t')
except:
print line
continue
key = '%s###%s' % (x, y)
current = '%s:%s' % (path, count)
# previous = l2r_db.get(key, '')
l2r_dict[key] += current + ','
# if previous != '':
# previous += ','
# l2r_db[key] = previous + current
# if ct % 1000000 == 0:
# l2r_db.sync()
for k, v in l2r_dict.items():
l2r_db[k] = v.rstrip(',')
l2r_db.sync()
示例6: __init_dbd__
# 需要导入模块: import bsddb [as 别名]
# 或者: from bsddb import btopen [as 别名]
def __init_dbd__(self):
print "prepare to initial the bdb"
print "[#] Check if the db exist"
if os.path.exists("all_sites.db"):
os.remove('all_sites.db')
if os.path.exists('visited.db'):
os.remove('visited.db')
print "[#] Cleared the db"
"""
>>> import bsddb
>>> db = bsddb.btopen('spam.db', 'c')
>>> for i in range(10): db['%d'%i] = '%d'% (i*i)
...
>>> db['3']
'9'
>>> db.keys()
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
>>> db.first()
('0', '0')
>>> db.next()
('1', '1')
>>> db.last()
('9', '81')
>>> db.set_location('2')
('2', '4')
>>> db.previous()
('1', '1')
>>> for k, v in db.iteritems():
... print k, v
0 0
1 1
2 4
3 9
4 16
5 25
6 36
7 49
8 64
9 81
>>> '8' in db
True
>>> db.sync()
0
"""
try:
self.all_sites = bsddb.btopen(file = 'all_sites.db', flag = 'c')
self.visited = bsddb.btopen(file = 'visited.db', flag = 'c')
print "[*]Success init BDB"
except:
print "[!]Bad ! Can't create BDB!"