本文整理汇总了Python中lmdb.open方法的典型用法代码示例。如果您正苦于以下问题:Python lmdb.open方法的具体用法?Python lmdb.open怎么用?Python lmdb.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lmdb
的用法示例。
在下文中一共展示了lmdb.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __init__(
self,
db_path,
db_name,
transform=None,
target_transform=None,
backend='cv2'):
self.env = lmdb.open(os.path.join(db_path, '{}.lmdb'.format(db_name)),
subdir=False,
readonly=True, lock=False,
readahead=False, meminit=False)
with self.env.begin() as txn:
self.length = load_pyarrow(txn.get(b'__len__'))
try:
self.classes = load_pyarrow(txn.get(b'classes'))
self.class_to_idx = load_pyarrow(txn.get(b'class_to_idx'))
except AssertionError:
pass
self.map_list = [get_key(i) for i in range(self.length)]
self.transform = transform
self.target_transform = target_transform
self.backend = backend
示例2: __init__
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __init__(self, root=None, transform=None, target_transform=None):
self.env = lmdb.open(
root,
max_readers=1,
readonly=True,
lock=False,
readahead=False,
meminit=False)
if not self.env:
print('cannot creat lmdb from %s' % (root))
sys.exit(0)
with self.env.begin(write=False) as txn:
nSamples = int(txn.get(b'num-samples'))
self.nSamples = nSamples
self.transform = transform
self.target_transform = target_transform
示例3: __get_feat_mapper
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __get_feat_mapper(self, path):
feat_cnts = defaultdict(lambda: defaultdict(int))
with open(path) as f:
f.readline()
pbar = tqdm(f, mininterval=1, smoothing=0.1)
pbar.set_description('Create avazu dataset cache: counting features')
for line in pbar:
values = line.rstrip('\n').split(',')
if len(values) != self.NUM_FEATS + 2:
continue
for i in range(1, self.NUM_FEATS + 1):
feat_cnts[i][values[i + 1]] += 1
feat_mapper = {i: {feat for feat, c in cnt.items() if c >= self.min_threshold} for i, cnt in feat_cnts.items()}
feat_mapper = {i: {feat: idx for idx, feat in enumerate(cnt)} for i, cnt in feat_mapper.items()}
defaults = {i: len(cnt) for i, cnt in feat_mapper.items()}
return feat_mapper, defaults
示例4: __yield_buffer
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __yield_buffer(self, path, feat_mapper, defaults, buffer_size=int(1e5)):
item_idx = 0
buffer = list()
with open(path) as f:
f.readline()
pbar = tqdm(f, mininterval=1, smoothing=0.1)
pbar.set_description('Create avazu dataset cache: setup lmdb')
for line in pbar:
values = line.rstrip('\n').split(',')
if len(values) != self.NUM_FEATS + 2:
continue
np_array = np.zeros(self.NUM_FEATS + 1, dtype=np.uint32)
np_array[0] = int(values[1])
for i in range(1, self.NUM_FEATS + 1):
np_array[i] = feat_mapper[i].get(values[i+1], defaults[i])
buffer.append((struct.pack('>I', item_idx), np_array.tobytes()))
item_idx += 1
if item_idx % buffer_size == 0:
yield buffer
buffer.clear()
yield buffer
示例5: __get_feat_mapper
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __get_feat_mapper(self, path):
feat_cnts = defaultdict(lambda: defaultdict(int))
with open(path) as f:
pbar = tqdm(f, mininterval=1, smoothing=0.1)
pbar.set_description('Create criteo dataset cache: counting features')
for line in pbar:
values = line.rstrip('\n').split('\t')
if len(values) != self.NUM_FEATS + 1:
continue
for i in range(1, self.NUM_INT_FEATS + 1):
feat_cnts[i][convert_numeric_feature(values[i])] += 1
for i in range(self.NUM_INT_FEATS + 1, self.NUM_FEATS + 1):
feat_cnts[i][values[i]] += 1
feat_mapper = {i: {feat for feat, c in cnt.items() if c >= self.min_threshold} for i, cnt in feat_cnts.items()}
feat_mapper = {i: {feat: idx for idx, feat in enumerate(cnt)} for i, cnt in feat_mapper.items()}
defaults = {i: len(cnt) for i, cnt in feat_mapper.items()}
return feat_mapper, defaults
示例6: __yield_buffer
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __yield_buffer(self, path, feat_mapper, defaults, buffer_size=int(1e5)):
item_idx = 0
buffer = list()
with open(path) as f:
pbar = tqdm(f, mininterval=1, smoothing=0.1)
pbar.set_description('Create criteo dataset cache: setup lmdb')
for line in pbar:
values = line.rstrip('\n').split('\t')
if len(values) != self.NUM_FEATS + 1:
continue
np_array = np.zeros(self.NUM_FEATS + 1, dtype=np.uint32)
np_array[0] = int(values[0])
for i in range(1, self.NUM_INT_FEATS + 1):
np_array[i] = feat_mapper[i].get(convert_numeric_feature(values[i]), defaults[i])
for i in range(self.NUM_INT_FEATS + 1, self.NUM_FEATS + 1):
np_array[i] = feat_mapper[i].get(values[i], defaults[i])
buffer.append((struct.pack('>I', item_idx), np_array.tobytes()))
item_idx += 1
if item_idx % buffer_size == 0:
yield buffer
buffer.clear()
yield buffer
示例7: lmdb_to_TFRecords
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def lmdb_to_TFRecords():
writer = tf.python_io.TFRecordWriter(tfrecord_fn)
# collect all lmdbs to write into one TFRecords (at least one lmdb)
db_paths = [os.path.join(data_dir, 'lmdb_0'), os.path.join(data_dir, 'lmdb_1'), os.path.join(data_dir, 'lmdb_2')]
for i in range(3):
env = lmdb.open(db_paths[i], readonly=True)
with env.begin() as txn:
with txn.cursor() as curs:
for key, value in curs:
print('put key: {} to train tfrecord'.format(key.decode('utf-8')))
feature = {
'name': __bytes_feature(key),
'block': __bytes_feature(value)
}
example = tf.train.Example(features=tf.train.Features(feature=feature))
writer.write(example.SerializeToString())
writer.close()
示例8: __init__
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __init__(self, lmdb_root, shape=None, shuffle=True, transform=None, target_transform=None, train=False, seen=0):
self.env = lmdb.open(lmdb_root,
max_readers=1,
readonly=True,
lock=False,
readahead=False,
meminit=False)
self.txn = self.env.begin(write=False)
self.nSamples = int(self.txn.get('num-samples'))
self.indices = range(self.nSamples)
if shuffle:
random.shuffle(self.indices)
self.transform = transform
self.target_transform = target_transform
self.train = train
self.shape = shape
self.seen = seen
#if self.train:
# print('init seen to %d' % (self.seen))
示例9: finalise
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def finalise(self):
"""
Ensures that the state of the data is flushed and writes the
provenance for the current operation. The specified 'command' is used
to fill the corresponding entry in the provenance dictionary.
"""
self._check_write_modes()
self.data.attrs[FINALISED_KEY] = True
if self.path is not None:
store = self.data.store
store.close()
logger.debug("Fixing up LMDB file size")
with lmdb.open(self.path, subdir=False, lock=False, writemap=True) as db:
# LMDB maps a very large amount of space by default. While this
# doesn't do any harm, it's annoying because we can't use ls to
# see the file sizes and the amount of RAM we're mapping can
# look like it's very large. So, we fix this up so that the
# map size is equal to the number of pages in use.
num_pages = db.info()["last_pgno"]
page_size = db.stat()["psize"]
db.set_mapsize(num_pages * page_size)
# Remove the lock file as we don't need it after this point.
remove_lmdb_lockfile(self.path)
self._open_readonly()
示例10: createStorage
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def createStorage(storage_type, storage_parm=None):
num_idx = storage_parm.get('num_idx', 0) if storage_parm else 0
if storage_type == 'mem':
# create inverted file storage
if num_idx > 0:
return [MemStorage() for i in xrange(num_idx)]
# create normal storage
else:
return MemStorage()
elif storage_type == 'lmdb':
path = storage_parm['path']
clear = storage_parm.get('clear', False)
# create inverted file storage
if num_idx > 0:
env = lmdb.open(path, map_size=2**30, sync=False, max_dbs=num_idx)
return [LMDBStorage(env, clear, i) for i in xrange(num_idx)]
# create normal storage
else:
env = lmdb.open(path, map_size=2**30, sync=False, max_dbs=1)
return LMDBStorage(env, clear)
else:
raise Exception('Wroing storage type: %s' % storage_type)
示例11: create_data_reader
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def create_data_reader(
model,
name,
input_data,
):
reader = model.param_init_net.CreateDB(
[],
name=name,
db=input_data,
db_type='lmdb',
)
lmdb_env = lmdb.open(input_data, readonly=True)
stat = lmdb_env.stat()
number_of_examples = stat["entries"]
lmdb_env.close()
return reader, number_of_examples
示例12: __init__
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __init__(self, db_path, transform=None, target_transform=None):
import lmdb
self.db_path = db_path
self.env = lmdb.open(db_path, max_readers=1, readonly=True, lock=False,
readahead=False, meminit=False)
with self.env.begin(write=False) as txn:
self.length = txn.stat()['entries']
cache_file = '_cache_' + db_path.replace('/', '_')
if os.path.isfile(cache_file):
self.keys = pickle.load(open(cache_file, "rb"))
else:
with self.env.begin(write=False) as txn:
self.keys = [key for key, _ in txn.cursor()]
pickle.dump(self.keys, open(cache_file, "wb"))
self.transform = transform
self.target_transform = target_transform
示例13: __getitem__
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __getitem__(self, index):
img, target = None, None
env = self.env
with env.begin(write=False) as txn:
imgbuf = txn.get(self.keys[index])
buf = six.BytesIO()
buf.write(imgbuf)
buf.seek(0)
img = Image.open(buf).convert('RGB')
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return img, target
示例14: write_lmdb
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def write_lmdb(self, image_dir_list):
lmdb_dir = self.get_store_dir(image_dir_list[0])
img_num = len(image_dir_list)
img_count = 0
with lmdb.open(str(lmdb_dir), map_size=int(1099511627776)) as lmdb_env:
with lmdb_env.begin(write=True) as lmdb_txn:
self.logger.info('Store database -->' + str(lmdb_dir))
for im_dir in image_dir_list:
img = self.read_img(im_dir, img_count)
key_id = '%08d' % img_count
lmdb_txn.put(key_id.encode(), img)
img_count += 1
if img_count % 10000 == 0 or img_count == img_num:
self.logger.info('pass %d, key id : %s' % (img_count, key_id))
assert img_count == img_num
self.logger.info('Total %08d images. Creating Finish' % img_count)
return lmdb_dir
示例15: __init__
# 需要导入模块: import lmdb [as 别名]
# 或者: from lmdb import open [as 别名]
def __init__(self, root, transform=None, target_transform=None):
import lmdb
self.root = os.path.expanduser(root)
self.transform = transform
self.target_transform = target_transform
self.env = lmdb.open(root, max_readers=1, readonly=True, lock=False,
readahead=False, meminit=False)
with self.env.begin(write=False) as txn:
self.length = txn.stat()['entries']
cache_file = '_cache_' + root.replace('/', '_')
if os.path.isfile(cache_file):
self.keys = pickle.load(open(cache_file, "rb"))
else:
with self.env.begin(write=False) as txn:
self.keys = [key for key, _ in txn.cursor()]
pickle.dump(self.keys, open(cache_file, "wb"))