本文整理匯總了Python中zarr.open方法的典型用法代碼示例。如果您正苦於以下問題:Python zarr.open方法的具體用法?Python zarr.open怎麽用?Python zarr.open使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類zarr
的用法示例。
在下文中一共展示了zarr.open方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: finalise
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def finalise(self):
"""
Ensures that the state of the data is flushed and writes the
provenance for the current operation. The specified 'command' is used
to fill the corresponding entry in the provenance dictionary.
"""
self._check_write_modes()
self.data.attrs[FINALISED_KEY] = True
if self.path is not None:
store = self.data.store
store.close()
logger.debug("Fixing up LMDB file size")
with lmdb.open(self.path, subdir=False, lock=False, writemap=True) as db:
# LMDB maps a very large amount of space by default. While this
# doesn't do any harm, it's annoying because we can't use ls to
# see the file sizes and the amount of RAM we're mapping can
# look like it's very large. So, we fix this up so that the
# map size is equal to the number of pages in use.
num_pages = db.info()["last_pgno"]
page_size = db.stat()["psize"]
db.set_mapsize(num_pages * page_size)
# Remove the lock file as we don't need it after this point.
remove_lmdb_lockfile(self.path)
self._open_readonly()
示例2: test_update_coords_only
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def test_update_coords_only(self):
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
delete_list = ['geospatial_lat_max', 'geospatial_lat_min', 'geospatial_lat_units', 'geospatial_lon_max',
'geospatial_lon_min', 'geospatial_lon_units', 'time_coverage_end', 'time_coverage_start']
for attr in ds1.attrs.keys():
if attr in delete_list:
ds1.attrs.__delitem__(attr)
result = self.invoke_cli(['edit', TEST_CUBE_ZARR_COORDS, '-o', TEST_CUBE_ZARR_EDIT, '-C'])
self.assertEqual(0, result.exit_code)
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
ds2 = zarr.open(TEST_CUBE_ZARR_EDIT)
for attr in delete_list:
self.assertNotIn(attr, ds1.attrs.keys())
self.assertEqual(ds1.__len__(), ds2.__len__())
self.assertIn('geospatial_lat_max', ds2.attrs.keys())
self.assertIn('geospatial_lat_min', ds2.attrs.keys())
self.assertIn('geospatial_lat_resolution', ds2.attrs.keys())
self.assertIn('geospatial_lat_units', ds2.attrs.keys())
self.assertIn('geospatial_lon_max', ds2.attrs.keys())
self.assertEqual(180.0, ds2.attrs.__getitem__('geospatial_lon_max'))
self.assertEqual(-180.0, ds2.attrs.__getitem__('geospatial_lon_min'))
self.assertEqual('2010-01-04T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_end'))
self.assertEqual('2010-01-01T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_start'))
self.assertEqual('degrees_east', ds2.attrs.__getitem__('geospatial_lon_units'))
示例3: test_update_coords_metadata_only
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def test_update_coords_metadata_only(self):
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
delete_list = ['geospatial_lat_max', 'geospatial_lat_min', 'geospatial_lat_units', 'geospatial_lon_max',
'geospatial_lon_min', 'geospatial_lon_units', 'time_coverage_end', 'time_coverage_start']
for attr in ds1.attrs.keys():
if attr in delete_list:
ds1.attrs.__delitem__(attr)
edit_metadata(TEST_CUBE_ZARR_COORDS, update_coords=True, in_place=False,
output_path=TEST_CUBE_ZARR_EDIT, monitor=print)
ds1 = zarr.open(TEST_CUBE_ZARR_COORDS)
ds2 = zarr.open(TEST_CUBE_ZARR_EDIT)
for attr in delete_list:
self.assertNotIn(attr, ds1.attrs.keys())
self.assertEqual(ds1.__len__(), ds2.__len__())
self.assertIn('geospatial_lat_max', ds2.attrs.keys())
self.assertIn('geospatial_lat_min', ds2.attrs.keys())
self.assertIn('geospatial_lat_resolution', ds2.attrs.keys())
self.assertIn('geospatial_lat_units', ds2.attrs.keys())
self.assertIn('geospatial_lon_max', ds2.attrs.keys())
self.assertEqual(180.0, ds2.attrs.__getitem__('geospatial_lon_max'))
self.assertEqual(-180.0, ds2.attrs.__getitem__('geospatial_lon_min'))
self.assertEqual('2010-01-04T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_end'))
self.assertEqual('2010-01-01T00:00:00.000000000', ds2.attrs.__getitem__('time_coverage_start'))
self.assertEqual('degrees_east', ds2.attrs.__getitem__('geospatial_lon_units'))
示例4: test_write_csv
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def test_write_csv(tmpdir):
expected_filename = os.path.join(tmpdir, 'test.csv')
column_names = ['column_1', 'column_2', 'column_3']
expected_data = np.random.random((5, len(column_names)))
# Write csv file
io.write_csv(expected_filename, expected_data, column_names=column_names)
assert os.path.exists(expected_filename)
# Check csv file is as expected
with open(expected_filename) as output_csv:
csv.reader(output_csv, delimiter=',')
for row_index, row in enumerate(output_csv):
if row_index == 0:
assert row == "column_1,column_2,column_3\n"
else:
output_row_data = [float(i) for i in row.split(',')]
np.testing.assert_allclose(
np.array(output_row_data), expected_data[row_index - 1]
)
示例5: test_zarr_multiscale
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def test_zarr_multiscale():
viewer = ViewerModel()
multiscale = [
np.random.random((20, 20)),
np.random.random((10, 10)),
np.random.random((5, 5)),
]
with TemporaryDirectory(suffix='.zarr') as fout:
root = zarr.open_group(fout, 'a')
for i in range(len(multiscale)):
shape = 20 // 2 ** i
z = root.create_dataset(str(i), shape=(shape,) * 2)
z[:] = multiscale[i]
viewer.open(fout, multiscale=True, plugin='builtins')
assert len(viewer.layers) == 1
assert len(multiscale) == len(viewer.layers[0].data)
# Note: due to lazy loading, the next line needs to happen within
# the context manager. Alternatively, we could convert to NumPy here.
for images, images_in in zip(multiscale, viewer.layers[0].data):
np.testing.assert_array_equal(images, images_in)
示例6: test_zarr_chunk_X
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def test_zarr_chunk_X(tmp_path):
import zarr
zarr_pth = Path(tmp_path) / "test.zarr"
adata = gen_adata((100, 100), X_type=np.array)
adata.write_zarr(zarr_pth, chunks=(10, 10))
z = zarr.open(str(zarr_pth)) # As of v2.3.2 zarr won’t take a Path
assert z["X"].chunks == (10, 10)
from_zarr = ad.read_zarr(zarr_pth)
assert_equal(from_zarr, adata)
################################
# Round-tripping scanpy datasets
################################
示例7: write_zarr
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def write_zarr(
store: Union[MutableMapping, str, Path],
adata: AnnData,
chunks=None,
**dataset_kwargs,
) -> None:
if isinstance(store, Path):
store = str(store)
adata.strings_to_categoricals()
if adata.raw is not None:
adata.strings_to_categoricals(adata.raw.var)
f = zarr.open(store, mode="w")
if chunks is not None and not isinstance(adata.X, sparse.spmatrix):
write_attribute(f, "X", adata.X, dict(chunks=chunks, **dataset_kwargs))
else:
write_attribute(f, "X", adata.X, dataset_kwargs)
write_attribute(f, "obs", adata.obs, dataset_kwargs)
write_attribute(f, "var", adata.var, dataset_kwargs)
write_attribute(f, "obsm", adata.obsm, dataset_kwargs)
write_attribute(f, "varm", adata.varm, dataset_kwargs)
write_attribute(f, "obsp", adata.obsp, dataset_kwargs)
write_attribute(f, "varp", adata.varp, dataset_kwargs)
write_attribute(f, "layers", adata.layers, dataset_kwargs)
write_attribute(f, "uns", adata.uns, dataset_kwargs)
write_attribute(f, "raw", adata.raw, dataset_kwargs)
示例8: read_series
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def read_series(dataset: zarr.Array) -> Union[np.ndarray, pd.Categorical]:
if "categories" in dataset.attrs:
categories = dataset.attrs["categories"]
if isinstance(categories, str):
categories_key = categories
parent_name = dataset.name.rstrip(dataset.basename)
parent = zarr.open(dataset.store)[parent_name]
categories_dset = parent[categories_key]
categories = categories_dset[...]
ordered = categories_dset.attrs.get("ordered", False)
else:
# TODO: remove this code at some point post 0.7
# TODO: Add tests for this
warn(
f"Your file {str(dataset.file.name)!r} has invalid categorical "
"encodings due to being written from a development version of "
"AnnData. Rewrite the file ensure you can read it in the future.",
FutureWarning,
)
return pd.Categorical.from_codes(dataset[...], categories, ordered=ordered)
else:
return dataset[...]
示例9: init_pretrained_wemb
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def init_pretrained_wemb(self, emb_dim):
"""
From blog.keras.io
Initialises words embeddings with pre-trained GLOVE embeddings
"""
embeddings_index = {}
f = open(os.path.join(self.data_dir, 'glove.6B.') +
str(emb_dim) + 'd.txt')
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype=np.float32)
embeddings_index[word] = coefs
f.close()
embedding_mat = np.zeros((self.q_words, emb_dim), dtype=np.float32)
for word, i in self.q_wtoi.items():
embedding_v = embeddings_index.get(word)
if embedding_v is not None:
embedding_mat[i] = embedding_v
self.pretrained_wemb = embedding_mat
示例10: set_platform
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def set_platform(self, platform_dict):
"""Set the Platform group in the AZFP nc file. AZFP does not record pitch, roll, and heave.
Parameters
----------
platform_dict
dictionary containing platform parameters
"""
if not os.path.exists(self.file_path):
print("netCDF file does not exist, exiting without saving Platform group...")
elif self.format == '.nc':
ncfile = netCDF4.Dataset(self.file_path, 'a', format='NETCDF4')
plat = ncfile.createGroup('Platform')
with netCDF4.Dataset(self.file_path, 'a', format='NETCDF4') as ncfile:
[plat.setncattr(k, v) for k, v in platform_dict.items()]
elif self.format == '.zarr' and not self.append_zarr: # Do not save platform if appending
zarrfile = zarr.open(self.file_path, mode='a')
plat = zarrfile.create_group('Platform')
for k, v in platform_dict.items():
plat.attrs[k] = v
示例11: _open_readonly
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def _open_readonly(self):
if self.path is not None:
store = self._open_lmbd_readonly()
else:
# This happens when we finalise an in-memory container.
store = self.data.store
self.data = zarr.open(store=store, mode="r")
self._check_format()
self._mode = self.READ_MODE
示例12: load
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def load(cls, path):
# Try to read the file. This should raise the correct error if we have a
# directory, missing file, permissions, etc.
with open(path, "r"):
pass
self = cls.__new__(cls)
self.mode = self.READ_MODE
self.path = path
self._open_readonly()
logger.info("Loaded {}".format(self.summary()))
return self
示例13: test_zero_sequence_length
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def test_zero_sequence_length(self):
# Mangle a sample data file to force a zero sequence length.
ts = msprime.simulate(10, mutation_rate=2, random_seed=5)
with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
filename = os.path.join(tempdir, "samples.tmp")
with tsinfer.SampleData(path=filename) as sample_data:
for var in ts.variants():
sample_data.add_site(var.site.position, var.genotypes)
store = zarr.LMDBStore(filename, subdir=False)
data = zarr.open(store=store, mode="w+")
data.attrs["sequence_length"] = 0
store.close()
sample_data = tsinfer.load(filename)
self.assertEqual(sample_data.sequence_length, 0)
self.assertRaises(ValueError, tsinfer.generate_ancestors, sample_data)
示例14: fromzarr
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def fromzarr(path, group=None, dataset=None, chunk_size=None):
import zarr
if isinstance(path, zarr.Array):
arr = path
if isinstance(arr.store, FSMap):
root = arr.store.root
path, dataset = root.rsplit('/', 1)
else:
path = arr.store.path
if '/' in arr.path and group is None:
group = arr.path.rsplit('/', 1)[0]
dataset = arr.basename
if not dataset:
path, dataset = path.rsplit('/', 1)
shape = arr.shape
elif isinstance(path, str):
fs = get_fs(path, None)
fs_map = FSMap(path, fs)
if group is None and dataset is None:
arr = zarr.open(fs_map)
if isinstance(arr, zarr.Array):
return fromzarr(arr, chunk_size=chunk_size)
g = zarr.group(store=fs_map)
arr = g[TensorFromZarr.get_path(group, dataset)]
shape = arr.shape
else:
raise TypeError('`path` passed has wrong type, '
'expect str, or zarr.Array'
'got {}'.format(type(path)))
chunk_size = chunk_size if chunk_size is not None else arr.chunks
op = TensorFromZarr(filename=path, group=group, dataset=dataset,
dtype=arr.dtype)
return op(shape, chunk_size=chunk_size, order=TensorOrder(arr.order))
示例15: tile
# 需要導入模塊: import zarr [as 別名]
# 或者: from zarr import open [as 別名]
def tile(cls, op):
import zarr
check_chunks_unknown_shape(op.inputs, TilesError)
in_tensor = op.input
# create dataset
fs = get_fs(op.path, None)
path = op.path
if op.group is not None:
path += '/' + op.group
fs_map = FSMap(path, fs)
zarr.open(fs_map, 'w', path=op.dataset,
dtype=in_tensor.dtype, shape=in_tensor.shape,
chunks=tuple(max(ns) for ns in in_tensor.nsplits),
**op.zarr_options.todict())
cum_nsplits = [[0] + np.cumsum(ns).tolist() for ns in in_tensor.nsplits]
out_chunks = []
for chunk in in_tensor.chunks:
chunk_op = op.copy().reset_key()
chunk_op._axis_offsets = \
tuple(cs[i] for i, cs in zip(chunk.index, cum_nsplits))
out_chunks.append(chunk_op.new_chunk([chunk], shape=(0,) * chunk.ndim,
index=chunk.index))
new_op = op.copy()
out = op.outputs[0]
nsplits = tuple((0,) * len(ns) for ns in in_tensor.nsplits)
return new_op.new_tensors(op.inputs, shape=out.shape, order=out.order,
nsplits=nsplits, chunks=out_chunks)