本文整理汇总了Python中dask.distributed方法的典型用法代码示例。如果您正苦于以下问题:Python dask.distributed方法的具体用法?Python dask.distributed怎么用?Python dask.distributed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dask
的用法示例。
在下文中一共展示了dask.distributed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: dispatch
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def dispatch(function, delayed=False, client=None):
"""
Decide how to wrap a function for Dask depending on the options given.
Parameters
----------
function : callable
The function that will be called.
delayed : bool
If True, will wrap the function in :func:`dask.delayed`.
client : None or dask.distributed Client
If *delayed* is False and *client* is not None, will return a partial
execution of the ``client.submit`` with the function as first argument.
Returns
-------
function : callable
The function wrapped in Dask.
"""
if delayed:
return dask.delayed(function)
if client is not None:
return functools.partial(client.submit, function)
return function
示例2: LaunchDaskDistributedClient
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):
if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:
from multiprocessing.pool import ThreadPool
try:
import dask
from dask.distributed import Client, LocalCluster
except ImportError:
raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")
dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
# INITIALISE CLUSTER
if scheduler_ip is None:
cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
client = Client(cluster)
else:
client = Client(scheduler_ip)
self.dask_client = client
self.is_dask_scheduler_initialised = True
示例3: dask_client_create
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def dask_client_create(**kwargs):
"""
Create Dask client object. The function is trivial and introduced so that
Dask client is created in uniform way throughout the program.
Parameters
----------
kwargs: dict, optional
kwargs will be passed to the Dask client constructor
Returns
-------
client: dask.distributed.Client
Dask client object
"""
_kwargs = {"processes": True, "silence_logs": logging.ERROR}
_kwargs.update(kwargs)
client = Client(**_kwargs)
dask.config.set(shuffle="disk")
path_dask_data = os.path.expanduser("~/.dask")
dask.config.set({"temporary_directory": path_dask_data})
return client
示例4: _submit_calcs_on_client
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _submit_calcs_on_client(calcs, client, func):
"""Submit calculations via dask.bag and a distributed client"""
logging.info('Connected to client: {}'.format(client))
if LooseVersion(dask.__version__) < '0.18':
dask_option_setter = dask.set_options
else:
dask_option_setter = dask.config.set
with dask_option_setter(get=client.get):
return db.from_sequence(calcs).map(func).compute()
示例5: _exec_calcs
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _exec_calcs(calcs, parallelize=False, client=None, **compute_kwargs):
"""Execute the given calculations.
Parameters
----------
calcs : Sequence of ``aospy.Calc`` objects
parallelize : bool, default False
Whether to submit the calculations in parallel or not
client : distributed.Client or None
The distributed Client used if parallelize is set to True; if None
a distributed LocalCluster is used.
compute_kwargs : dict of keyword arguments passed to ``Calc.compute``
Returns
-------
A list of the values returned by each Calc object that was executed.
"""
if parallelize:
def func(calc):
"""Wrap _compute_or_skip_on_error to require only the calc
argument"""
if 'write_to_tar' in compute_kwargs:
compute_kwargs['write_to_tar'] = False
return _compute_or_skip_on_error(calc, compute_kwargs)
if client is None:
n_workers = _n_workers_for_local_cluster(calcs)
with distributed.LocalCluster(n_workers=n_workers) as cluster:
with distributed.Client(cluster) as client:
result = _submit_calcs_on_client(calcs, client, func)
else:
result = _submit_calcs_on_client(calcs, client, func)
if compute_kwargs['write_to_tar']:
_serial_write_to_tar(calcs)
return result
else:
return [_compute_or_skip_on_error(calc, compute_kwargs)
for calc in calcs]
示例6: cli
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def cli(tmpdir):
import dask.distributed
client = dask.distributed.Client(n_workers=1)
def setup():
m = fsspec.filesystem("memory")
with m.open("afile", "wb") as f:
f.write(b"data")
client.run(setup)
try:
yield client
finally:
client.close()
示例7: build_dict
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def build_dict(self, pipeline_name):
'''Returns a dict we can use for kwargs passed to dask client instantiation.
Intended to be used like:
with dask.distributed.Client(**cfg.build_dict()) as client:
<< use client here >>
'''
if self.cluster_type in ['yarn', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube']:
dask_cfg = {'name': pipeline_name}
else:
dask_cfg = {}
if self.cluster_configuration:
for k, v in self.cluster_configuration.items():
dask_cfg[k] = v
# if address is set, don't add LocalCluster args
# context: https://github.com/dask/distributed/issues/3313
if (self.cluster_type == 'local') and ('address' not in dask_cfg):
# We set threads_per_worker because Dagster is not thread-safe. Even though
# environments=True by default, there is a clever piece of machinery
# (dask.distributed.deploy.local.nprocesses_nthreads) that automagically makes execution
# multithreaded by default when the number of available cores is greater than 4.
# See: https://github.com/dagster-io/dagster/issues/2181
# We may want to try to figure out a way to enforce this on remote Dask clusters against
# which users run Dagster workloads.
dask_cfg['threads_per_worker'] = 1
return dask_cfg
示例8: scale
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def scale(self, n):
# A shim to maintain backward compatibility
# https://github.com/dask/distributed/issues/3054
maximum = dask.config.get("kubernetes.count.max")
if maximum is not None and maximum < n:
logger.info(
"Tried to scale beyond maximum number of workers %d > %d", n, maximum
)
n = maximum
return super().scale(n)
示例9: test_logs
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def test_logs(remote_cluster):
cluster = remote_cluster
cluster.scale(2)
await cluster
start = time()
while len(cluster.scheduler_info["workers"]) < 2:
await asyncio.sleep(0.1)
assert time() < start + 20
logs = await cluster.logs()
assert len(logs) == 3
for _, log in logs.items():
assert "distributed.scheduler" in log or "distributed.worker" in log
示例10: test_diagnostics_link_env_variable
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def test_diagnostics_link_env_variable(pod_spec, ns):
pytest.importorskip("bokeh")
with dask.config.set({"distributed.dashboard.link": "foo-{USER}-{port}"}):
async with KubeCluster(pod_spec, namespace=ns, asynchronous=True) as cluster:
port = cluster.scheduler_info["services"]["dashboard"]
assert (
"foo-" + getpass.getuser() + "-" + str(port) in cluster.dashboard_link
)
示例11: client
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def client():
from dask.distributed import Client
with Client(n_workers=4) as dask_client:
yield dask_client
示例12: wait_and_display_progress
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def wait_and_display_progress(fut, progress_bar=None):
"""
Wait for the future to complete and display the progress bar.
This method may be used to drive any custom progress bar, which
displays progress in percent from 0 to 100.
Parameters
----------
fut: dask future
future object for the batch of tasks submitted to the distributed
client.
progress_bar: callable or None
callable function or callable object with methods `start()`,
`__call__(float)` and `finish()`. The methods `start()` and
`finish()` are optional. For example, this could be a reference
to an instance of the object `TerminalProgressBar`
Examples
--------
.. code-block::
client = Client()
data = da.random.random(size=(100, 100), chunks=(10, 10))
sm_fut = da.sum(data, axis=0).persist(scheduler=client)
# Call the progress monitor
wait_and_display_progress(sm_fut, TerminalProgressBar("Monitoring progress: "))
sm = sm_fut.compute(scheduler=client)
client.close()
"""
# If there is no progress bar, then just return without waiting for the future
if progress_bar is None:
return
if hasattr(progress_bar, "start"):
progress_bar.start()
progress_bar(1.0)
while True:
done, not_done = wait(fut, return_when='FIRST_COMPLETED')
n_completed, n_pending = len(done), len(not_done)
n_total = n_completed + n_pending
percent_completed = n_completed / n_total * 100.0 if n_total > 0 else 100.0
# It is guaranteed that 'progress_bar' is called for 100% completion
progress_bar(percent_completed)
if not n_pending:
break
ttime.sleep(0.5)
if hasattr(progress_bar, "finish"):
progress_bar.finish()
示例13: _chunk_numpy_array
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _chunk_numpy_array(data, chunk_size):
"""
Convert a numpy array into Dask array with chunks of given size. The function
splits the array into chunks along axes 0 and 1. If the array has more than 2 dimensions,
then the remaining dimensions are not chunked. Note, that
`dask_array = da.array(data, chunks=...)` will set the chunk size, but not split the
data into chunks, therefore the array can not be loaded block by block by workers
controlled by a distributed scheduler.
Parameters
----------
data: ndarray(float), 2 or more dimensions
XRF map of the shape `(ny, nx, ne)`, where `ny` and `nx` represent the image size
and `ne` is the number of points in spectra
chunk_size: tuple(int, int) or list(int, int)
Chunk size for axis 0 and 1: `(chunk_y, chunk_x`). The function will accept
chunk size values that are larger then the respective `data` array dimensions.
Returns
-------
data_dask: dask.array
Dask array with the given chunk size
"""
chunk_y, chunk_x = chunk_size
ny, nx = data.shape[0:2]
chunk_y, chunk_x = min(chunk_y, ny), min(chunk_x, nx)
def _get_slice(n1, n2):
data_slice = data[slice(n1 * chunk_y, min(n1 * chunk_y + chunk_y, ny)),
slice(n2 * chunk_x, min(n2 * chunk_x + chunk_x, nx))]
# Wrap the slice into a list wiht appropriate dimensions
for _ in range(2, data.ndim):
data_slice = [data_slice]
return data_slice
# Chunk the numpy array and assemble it as a dask array
data_dask = da.block([
[
_get_slice(_1, _2)
for _2 in range(int(math.ceil(nx / chunk_x)))
]
for _1 in range(int(math.ceil(ny / chunk_y)))
])
return data_dask
示例14: _fit_xrf_block
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _fit_xrf_block(data, data_sel_indices,
matv, snip_param, use_snip):
"""
Spectrum fitting for a block of XRF dataset. The function is intended to be
called using `map_blocks` function for parallel processing using Dask distributed
package.
Parameters
----------
data : ndarray
block of an XRF dataset. Shape=(ny, nx, ne).
data_sel_indices: tuple
tuple `(n_start, n_end)` which defines the indices along axis 2 of `data` array
that are used for fitting. Note that `ne` (in `data`) and `ne_model` (in `matv`)
are not equal. But `n_end - n_start` MUST be equal to `ne_model`! Indexes
`n_start .. n_end - 1` will be selected from each pixel.
matv: ndarray
Matrix of spectra of the selected elements (emission lines). Shape=(ne_model, n_lines)
snip_param: dict
Dictionary of parameters forwarded to 'snip' method for background removal.
Keys: `e_offset`, `e_linear`, `e_quadratic` (parameters of the energy axis approximation),
`b_width` (width of the window that defines resolution of the snip algorithm).
use_snip: bool, optional
enable/disable background removal using snip algorithm
Returns
-------
data_out: ndarray
array with fitting results. Shape: `(ny, nx, ne_model + 4)`. For each pixel
the output data contains: `ne_model` values that represent area under the emission
line spectra; background area (only in the selected energy range), error (R-factor),
total count in the selected energy range, total count of the full experimental spectrum.
"""
spec = data
spec_sel = spec[:, :, data_sel_indices[0]: data_sel_indices[1]]
if use_snip:
bg_sel = np.apply_along_axis(snip_method_numba, 2, spec_sel,
snip_param['e_offset'],
snip_param['e_linear'],
snip_param['e_quadratic'],
width=snip_param['b_width'])
y = spec_sel - bg_sel
bg_sum = np.sum(bg_sel, axis=2)
else:
y = spec_sel
bg_sum = np.zeros(shape=data.shape[0:2])
weights, rfactor, _ = fit_spectrum(y, matv, axis=2, method="nnls")
total_cnt = np.sum(spec, axis=2)
sel_cnt = np.sum(spec_sel, axis=2)
# Stack depth-wise (along axis 2)
data_out = np.dstack((weights, bg_sum, rfactor, sel_cnt, total_cnt))
return data_out
示例15: createSystemFromIUPAC
# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def createSystemFromIUPAC(iupac_name):
"""
Create an openmm system out of an oemol
Parameters
----------
iupac_name : str
IUPAC name
Returns
-------
molecule : openeye.OEMol
OEMol molecule
system : openmm.System object
OpenMM system
positions : [n,3] np.array of floats
Positions
topology : openmm.app.Topology object
Topology
"""
from perses.utils.data import get_data_filename
from perses.utils.openeye import extractPositionsFromOEMol
# Create OEMol
molecule = iupac_to_oemol(iupac_name)
# Generate a topology.
from openmoltools.forcefield_generators import generateTopologyFromOEMol
topology = generateTopologyFromOEMol(molecule)
# Initialize a forcefield with GAFF.
# TODO: Fix path for `gaff.xml` since it is not yet distributed with OpenMM
from simtk.openmm.app import ForceField
gaff_xml_filename = get_data_filename('data/gaff.xml')
forcefield = ForceField(gaff_xml_filename)
# Generate template and parameters.
from openmoltools.forcefield_generators import generateResidueTemplate
[template, ffxml] = generateResidueTemplate(molecule)
# Register the template.
forcefield.registerResidueTemplate(template)
# Add the parameters.
forcefield.loadFile(StringIO(ffxml))
# Create the system.
system = forcefield.createSystem(topology, removeCMMotion=False)
# Extract positions
positions = extractPositionsFromOEMol(molecule)
return (molecule, system, positions, topology)