本文整理汇总了Python中zarr.open方法的典型用法代码示例。如果您正苦于以下问题:Python zarr.open方法的具体用法?Python zarr.open怎么用?Python zarr.open使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类zarr
的用法示例。
在下文中一共展示了zarr.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: finalise
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def finalise(self):
"""
Ensures that the state of the data is flushed and writes the
provenance for the current operation. The specified 'command' is used
to fill the corresponding entry in the provenance dictionary.
"""
self._check_write_modes()
self.data.attrs[FINALISED_KEY] = True
if self.path is not None:
store = self.data.store
store.close()
logger.debug("Fixing up LMDB file size")
with lmdb.open(self.path, subdir=False, lock=False, writemap=True) as db:
# LMDB maps a very large amount of space by default. While this
# doesn't do any harm, it's annoying because we can't use ls to
# see the file sizes and the amount of RAM we're mapping can
# look like it's very large. So, we fix this up so that the
# map size is equal to the number of pages in use.
num_pages = db.info()["last_pgno"]
page_size = db.stat()["psize"]
db.set_mapsize(num_pages * page_size)
# Remove the lock file as we don't need it after this point.
remove_lmdb_lockfile(self.path)
self._open_readonly()
示例2: test_update_coords_only
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def test_update_coords_only(self):
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
delete_list = ['geospatial_lat_max', 'geospatial_lat_min', 'geospatial_lat_units', 'geospatial_lon_max',
'geospatial_lon_min', 'geospatial_lon_units', 'time_coverage_end', 'time_coverage_start']
for attr in ds1.attrs.keys():
if attr in delete_list:
ds1.attrs.__delitem__(attr)
result = self.invoke_cli(['edit', TEST_CUBE_ZARR_COORDS, '-o', TEST_CUBE_ZARR_EDIT, '-C'])
self.assertEqual(0, result.exit_code)
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
ds2 = zarr.open(TEST_CUBE_ZARR_EDIT)
for attr in delete_list:
self.assertNotIn(attr, ds1.attrs.keys())
self.assertEqual(ds1.__len__(), ds2.__len__())
self.assertIn('geospatial_lat_max', ds2.attrs.keys())
self.assertIn('geospatial_lat_min', ds2.attrs.keys())
self.assertIn('geospatial_lat_resolution', ds2.attrs.keys())
self.assertIn('geospatial_lat_units', ds2.attrs.keys())
self.assertIn('geospatial_lon_max', ds2.attrs.keys())
self.assertEqual(180.0, ds2.attrs.__getitem__('geospatial_lon_max'))
self.assertEqual(-180.0, ds2.attrs.__getitem__('geospatial_lon_min'))
self.assertEqual('2010-01-04T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_end'))
self.assertEqual('2010-01-01T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_start'))
self.assertEqual('degrees_east', ds2.attrs.__getitem__('geospatial_lon_units'))
示例3: test_update_coords_metadata_only
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def test_update_coords_metadata_only(self):
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
delete_list = ['geospatial_lat_max', 'geospatial_lat_min', 'geospatial_lat_units', 'geospatial_lon_max',
'geospatial_lon_min', 'geospatial_lon_units', 'time_coverage_end', 'time_coverage_start']
for attr in ds1.attrs.keys():
if attr in delete_list:
ds1.attrs.__delitem__(attr)
edit_metadata(TEST_CUBE_ZARR_COORDS, update_coords=True, in_place=False,
output_path=TEST_CUBE_ZARR_EDIT, monitor=print)
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
ds2 = zarr.open(TEST_CUBE_ZARR_EDIT)
for attr in delete_list:
self.assertNotIn(attr, ds1.attrs.keys())
self.assertEqual(ds1.__len__(), ds2.__len__())
self.assertIn('geospatial_lat_max', ds2.attrs.keys())
self.assertIn('geospatial_lat_min', ds2.attrs.keys())
self.assertIn('geospatial_lat_resolution', ds2.attrs.keys())
self.assertIn('geospatial_lat_units', ds2.attrs.keys())
self.assertIn('geospatial_lon_max', ds2.attrs.keys())
self.assertEqual(180.0, ds2.attrs.__getitem__('geospatial_lon_max'))
self.assertEqual(-180.0, ds2.attrs.__getitem__('geospatial_lon_min'))
self.assertEqual('2010-01-04T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_end'))
self.assertEqual('2010-01-01T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_start'))
self.assertEqual('degrees_east', ds2.attrs.__getitem__('geospatial_lon_units'))
示例4: test_write_csv
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def test_write_csv(tmpdir):
expected_filename = os.path.join(tmpdir, 'test.csv')
column_names = ['column_1', 'column_2', 'column_3']
expected_data = np.random.random((5, len(column_names)))
# Write csv file
io.write_csv(expected_filename, expected_data, column_names=column_names)
assert os.path.exists(expected_filename)
# Check csv file is as expected
with open(expected_filename) as output_csv:
csv.reader(output_csv, delimiter=',')
for row_index, row in enumerate(output_csv):
if row_index == 0:
assert row == "column_1,column_2,column_3\n"
else:
output_row_data = [float(i) for i in row.split(',')]
np.testing.assert_allclose(
np.array(output_row_data), expected_data[row_index - 1]
)
示例5: test_zarr_multiscale
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def test_zarr_multiscale():
viewer = ViewerModel()
multiscale = [
np.random.random((20, 20)),
np.random.random((10, 10)),
np.random.random((5, 5)),
]
with TemporaryDirectory(suffix='.zarr') as fout:
root = zarr.open_group(fout, 'a')
for i in range(len(multiscale)):
shape = 20 // 2 ** i
z = root.create_dataset(str(i), shape=(shape,) * 2)
z[:] = multiscale[i]
viewer.open(fout, multiscale=True, plugin='builtins')
assert len(viewer.layers) == 1
assert len(multiscale) == len(viewer.layers[0].data)
# Note: due to lazy loading, the next line needs to happen within
# the context manager. Alternatively, we could convert to NumPy here.
for images, images_in in zip(multiscale, viewer.layers[0].data):
np.testing.assert_array_equal(images, images_in)
示例6: test_zarr_chunk_X
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def test_zarr_chunk_X(tmp_path):
import zarr
zarr_pth = Path(tmp_path) / "test.zarr"
adata = gen_adata((100, 100), X_type=np.array)
adata.write_zarr(zarr_pth, chunks=(10, 10))
z = zarr.open(str(zarr_pth)) # As of v2.3.2 zarr won’t take a Path
assert z["X"].chunks == (10, 10)
from_zarr = ad.read_zarr(zarr_pth)
assert_equal(from_zarr, adata)
################################
# Round-tripping scanpy datasets
################################
示例7: write_zarr
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def write_zarr(
store: Union[MutableMapping, str, Path],
adata: AnnData,
chunks=None,
**dataset_kwargs,
) -> None:
if isinstance(store, Path):
store = str(store)
adata.strings_to_categoricals()
if adata.raw is not None:
adata.strings_to_categoricals(adata.raw.var)
f = zarr.open(store, mode="w")
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
write_attribute(f, "X", adata.X, dict(chunks=chunks, **dataset_kwargs))
else:
write_attribute(f, "X", adata.X, dataset_kwargs)
write_attribute(f, "obs", adata.obs, dataset_kwargs)
write_attribute(f, "var", adata.var, dataset_kwargs)
write_attribute(f, "obsm", adata.obsm, dataset_kwargs)
write_attribute(f, "varm", adata.varm, dataset_kwargs)
write_attribute(f, "obsp", adata.obsp, dataset_kwargs)
write_attribute(f, "varp", adata.varp, dataset_kwargs)
write_attribute(f, "layers", adata.layers, dataset_kwargs)
write_attribute(f, "uns", adata.uns, dataset_kwargs)
write_attribute(f, "raw", adata.raw, dataset_kwargs)
示例8: read_series
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def read_series(dataset: zarr.Array) -> Union[np.ndarray, pd.Categorical]:
if "categories" in dataset.attrs:
categories = dataset.attrs["categories"]
if isinstance(categories, str):
categories_key = categories
parent_name = dataset.name.rstrip(dataset.basename)
parent = zarr.open(dataset.store)[parent_name]
categories_dset = parent[categories_key]
categories = categories_dset[...]
ordered = categories_dset.attrs.get("ordered", False)
else:
# TODO: remove this code at some point post 0.7
# TODO: Add tests for this
warn(
f"Your file {str(dataset.file.name)!r} has invalid categorical "
"encodings due to being written from a development version of "
"AnnData. Rewrite the file ensure you can read it in the future.",
FutureWarning,
)
return pd.Categorical.from_codes(dataset[...], categories, ordered=ordered)
else:
return dataset[...]
示例9: init_pretrained_wemb
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def init_pretrained_wemb(self, emb_dim):
"""
From blog.keras.io
Initialises words embeddings with pre-trained GLOVE embeddings
"""
embeddings_index = {}
f = open(os.path.join(self.data_dir, 'glove.6B.') +
str(emb_dim) + 'd.txt')
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype=np.float32)
embeddings_index[word] = coefs
f.close()
embedding_mat = np.zeros((self.q_words, emb_dim), dtype=np.float32)
for word, i in self.q_wtoi.items():
embedding_v = embeddings_index.get(word)
if embedding_v is not None:
embedding_mat[i] = embedding_v
self.pretrained_wemb = embedding_mat
示例10: set_platform
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def set_platform(self, platform_dict):
"""Set the Platform group in the AZFP nc file. AZFP does not record pitch, roll, and heave.
Parameters
----------
platform_dict
dictionary containing platform parameters
"""
if not os.path.exists(self.file_path):
print("netCDF file does not exist, exiting without saving Platform group...")
elif self.format == '.nc':
ncfile = netCDF4.Dataset(self.file_path, 'a', format='NETCDF4')
plat = ncfile.createGroup('Platform')
with netCDF4.Dataset(self.file_path, 'a', format='NETCDF4') as ncfile:
[plat.setncattr(k, v) for k, v in platform_dict.items()]
elif self.format == '.zarr' and not self.append_zarr: # Do not save platform if appending
zarrfile = zarr.open(self.file_path, mode='a')
plat = zarrfile.create_group('Platform')
for k, v in platform_dict.items():
plat.attrs[k] = v
示例11: _open_readonly
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def _open_readonly(self):
if self.path is not None:
store = self._open_lmbd_readonly()
else:
# This happens when we finalise an in-memory container.
store = self.data.store
self.data = zarr.open(store=store, mode="r")
self._check_format()
self._mode = self.READ_MODE
示例12: load
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def load(cls, path):
# Try to read the file. This should raise the correct error if we have a
# directory, missing file, permissions, etc.
with open(path, "r"):
pass
self = cls.__new__(cls)
self.mode = self.READ_MODE
self.path = path
self._open_readonly()
logger.info("Loaded {}".format(self.summary()))
return self
示例13: test_zero_sequence_length
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def test_zero_sequence_length(self):
# Mangle a sample data file to force a zero sequence length.
ts = msprime.simulate(10, mutation_rate=2, random_seed=5)
with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
filename = os.path.join(tempdir, "samples.tmp")
with tsinfer.SampleData(path=filename) as sample_data:
for var in ts.variants():
sample_data.add_site(var.site.position, var.genotypes)
store = zarr.LMDBStore(filename, subdir=False)
data = zarr.open(store=store, mode="w+")
data.attrs["sequence_length"] = 0
store.close()
sample_data = tsinfer.load(filename)
self.assertEqual(sample_data.sequence_length, 0)
self.assertRaises(ValueError, tsinfer.generate_ancestors, sample_data)
示例14: fromzarr
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def fromzarr(path, group=None, dataset=None, chunk_size=None):
import zarr
if isinstance(path, zarr.Array):
arr = path
if isinstance(arr.store, FSMap):
root = arr.store.root
path, dataset = root.rsplit('/', 1)
else:
path = arr.store.path
if '/' in arr.path and group is None:
group = arr.path.rsplit('/', 1)[0]
dataset = arr.basename
if not dataset:
path, dataset = path.rsplit('/', 1)
shape = arr.shape
elif isinstance(path, str):
fs = get_fs(path, None)
fs_map = FSMap(path, fs)
if group is None and dataset is None:
arr = zarr.open(fs_map)
if isinstance(arr, zarr.Array):
return fromzarr(arr, chunk_size=chunk_size)
g = zarr.group(store=fs_map)
arr = g[TensorFromZarr.get_path(group, dataset)]
shape = arr.shape
else:
raise TypeError('`path` passed has wrong type, '
'expect str, or zarr.Array'
'got {}'.format(type(path)))
chunk_size = chunk_size if chunk_size is not None else arr.chunks
op = TensorFromZarr(filename=path, group=group, dataset=dataset,
dtype=arr.dtype)
return op(shape, chunk_size=chunk_size, order=TensorOrder(arr.order))
示例15: tile
# 需要导入模块: import zarr [as 别名]
# 或者: from zarr import open [as 别名]
def tile(cls, op):
import zarr
check_chunks_unknown_shape(op.inputs, TilesError)
in_tensor = op.input
# create dataset
fs = get_fs(op.path, None)
path = op.path
if op.group is not None:
path += '/' + op.group
fs_map = FSMap(path, fs)
zarr.open(fs_map, 'w', path=op.dataset,
dtype=in_tensor.dtype, shape=in_tensor.shape,
chunks=tuple(max(ns) for ns in in_tensor.nsplits),
**op.zarr_options.todict())
cum_nsplits = [[0] + np.cumsum(ns).tolist() for ns in in_tensor.nsplits]
out_chunks = []
for chunk in in_tensor.chunks:
chunk_op = op.copy().reset_key()
chunk_op._axis_offsets = \
tuple(cs[i] for i, cs in zip(chunk.index, cum_nsplits))
out_chunks.append(chunk_op.new_chunk([chunk], shape=(0,) * chunk.ndim,
index=chunk.index))
new_op = op.copy()
out = op.outputs[0]
nsplits = tuple((0,) * len(ns) for ns in in_tensor.nsplits)
return new_op.new_tensors(op.inputs, shape=out.shape, order=out.order,
nsplits=nsplits, chunks=out_chunks)