Python dask.distributed方法代码示例

本文整理汇总了Python中dask.distributed方法的典型用法代码示例。如果您正苦于以下问题：Python dask.distributed方法的具体用法？Python dask.distributed怎么用？Python dask.distributed使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dask的用法示例。

在下文中一共展示了dask.distributed方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: dispatch

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def dispatch(function, delayed=False, client=None):
    """
    Decide how to wrap a function for Dask depending on the options given.

    Parameters
    ----------
    function : callable
        The function that will be called.
    delayed : bool
        If True, will wrap the function in :func:`dask.delayed`.
    client : None or dask.distributed Client
        If *delayed* is False and *client* is not None, will return a partial
        execution of the ``client.submit`` with the function as first argument.

    Returns
    -------
    function : callable
        The function wrapped in Dask.

    """
    if delayed:
        return dask.delayed(function)
    if client is not None:
        return functools.partial(client.submit, function)
    return function

开发者ID:fatiando，项目名称:verde，代码行数:27，代码来源:utils.py

示例2: LaunchDaskDistributedClient

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):

        if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:

            from multiprocessing.pool import ThreadPool
            try:
                import dask
                from dask.distributed import Client, LocalCluster
            except ImportError:
                raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")

            dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
            # INITIALISE CLUSTER
            if scheduler_ip is None:
                cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
                client = Client(cluster)
            else:
                client = Client(scheduler_ip)

            self.dask_client = client

            self.is_dask_scheduler_initialised = True

开发者ID:romeric，项目名称:florence，代码行数:24，代码来源:FEMSolver.py

示例3: dask_client_create

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def dask_client_create(**kwargs):
    """
    Create Dask client object. The function is trivial and introduced so that
    Dask client is created in uniform way throughout the program.

    Parameters
    ----------
    kwargs: dict, optional
        kwargs will be passed to the Dask client constructor

    Returns
    -------
    client: dask.distributed.Client
        Dask client object
    """
    _kwargs = {"processes": True, "silence_logs": logging.ERROR}
    _kwargs.update(kwargs)
    client = Client(**_kwargs)
    dask.config.set(shuffle="disk")
    path_dask_data = os.path.expanduser("~/.dask")
    dask.config.set({"temporary_directory": path_dask_data})
    return client

开发者ID:NSLS-II，项目名称:PyXRF，代码行数:24，代码来源:map_processing.py

示例4: _submit_calcs_on_client

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _submit_calcs_on_client(calcs, client, func):
    """Submit calculations via dask.bag and a distributed client"""
    logging.info('Connected to client: {}'.format(client))
    if LooseVersion(dask.__version__) < '0.18':
        dask_option_setter = dask.set_options
    else:
        dask_option_setter = dask.config.set
    with dask_option_setter(get=client.get):
        return db.from_sequence(calcs).map(func).compute()

开发者ID:spencerahill，项目名称:aospy，代码行数:11，代码来源:automate.py

示例5: _exec_calcs

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _exec_calcs(calcs, parallelize=False, client=None, **compute_kwargs):
    """Execute the given calculations.

    Parameters
    ----------
    calcs : Sequence of ``aospy.Calc`` objects
    parallelize : bool, default False
        Whether to submit the calculations in parallel or not
    client : distributed.Client or None
        The distributed Client used if parallelize is set to True; if None
        a distributed LocalCluster is used.
    compute_kwargs : dict of keyword arguments passed to ``Calc.compute``

    Returns
    -------
    A list of the values returned by each Calc object that was executed.
    """
    if parallelize:
        def func(calc):
            """Wrap _compute_or_skip_on_error to require only the calc
            argument"""
            if 'write_to_tar' in compute_kwargs:
                compute_kwargs['write_to_tar'] = False
            return _compute_or_skip_on_error(calc, compute_kwargs)

        if client is None:
            n_workers = _n_workers_for_local_cluster(calcs)
            with distributed.LocalCluster(n_workers=n_workers) as cluster:
                with distributed.Client(cluster) as client:
                    result = _submit_calcs_on_client(calcs, client, func)
        else:
            result = _submit_calcs_on_client(calcs, client, func)
        if compute_kwargs['write_to_tar']:
            _serial_write_to_tar(calcs)
        return result
    else:
        return [_compute_or_skip_on_error(calc, compute_kwargs)
                for calc in calcs]

开发者ID:spencerahill，项目名称:aospy，代码行数:40，代码来源:automate.py

示例6: cli

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def cli(tmpdir):
    import dask.distributed

    client = dask.distributed.Client(n_workers=1)

    def setup():
        m = fsspec.filesystem("memory")
        with m.open("afile", "wb") as f:
            f.write(b"data")

    client.run(setup)
    try:
        yield client
    finally:
        client.close()

开发者ID:intake，项目名称:filesystem_spec，代码行数:17，代码来源:test_dask.py

示例7: build_dict

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def build_dict(self, pipeline_name):
        '''Returns a dict we can use for kwargs passed to dask client instantiation.

        Intended to be used like:

        with dask.distributed.Client(**cfg.build_dict()) as client:
            << use client here >>

        '''
        if self.cluster_type in ['yarn', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube']:
            dask_cfg = {'name': pipeline_name}
        else:
            dask_cfg = {}

        if self.cluster_configuration:
            for k, v in self.cluster_configuration.items():
                dask_cfg[k] = v

        # if address is set, don't add LocalCluster args
        # context: https://github.com/dask/distributed/issues/3313
        if (self.cluster_type == 'local') and ('address' not in dask_cfg):
            # We set threads_per_worker because Dagster is not thread-safe. Even though
            # environments=True by default, there is a clever piece of machinery
            # (dask.distributed.deploy.local.nprocesses_nthreads) that automagically makes execution
            # multithreaded by default when the number of available cores is greater than 4.
            # See: https://github.com/dagster-io/dagster/issues/2181
            # We may want to try to figure out a way to enforce this on remote Dask clusters against
            # which users run Dagster workloads.
            dask_cfg['threads_per_worker'] = 1

        return dask_cfg

开发者ID:dagster-io，项目名称:dagster，代码行数:33，代码来源:executor.py

示例8: scale

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def scale(self, n):
        # A shim to maintain backward compatibility
        # https://github.com/dask/distributed/issues/3054
        maximum = dask.config.get("kubernetes.count.max")
        if maximum is not None and maximum < n:
            logger.info(
                "Tried to scale beyond maximum number of workers %d > %d", n, maximum
            )
            n = maximum
        return super().scale(n)

开发者ID:dask，项目名称:dask-kubernetes，代码行数:12，代码来源:core.py

示例9: test_logs

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def test_logs(remote_cluster):
    cluster = remote_cluster
    cluster.scale(2)
    await cluster

    start = time()
    while len(cluster.scheduler_info["workers"]) < 2:
        await asyncio.sleep(0.1)
        assert time() < start + 20

    logs = await cluster.logs()
    assert len(logs) == 3
    for _, log in logs.items():
        assert "distributed.scheduler" in log or "distributed.worker" in log

开发者ID:dask，项目名称:dask-kubernetes，代码行数:16，代码来源:test_async.py

示例10: test_diagnostics_link_env_variable

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def test_diagnostics_link_env_variable(pod_spec, ns):
    pytest.importorskip("bokeh")
    with dask.config.set({"distributed.dashboard.link": "foo-{USER}-{port}"}):
        async with KubeCluster(pod_spec, namespace=ns, asynchronous=True) as cluster:
            port = cluster.scheduler_info["services"]["dashboard"]

            assert (
                "foo-" + getpass.getuser() + "-" + str(port) in cluster.dashboard_link
            )

开发者ID:dask，项目名称:dask-kubernetes，代码行数:11，代码来源:test_async.py

示例11: client

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def client():
    from dask.distributed import Client
    with Client(n_workers=4) as dask_client:
        yield dask_client

开发者ID:pangeo-data，项目名称:pangeo-stacks，代码行数:6，代码来源:test_dask_env.py

示例12: wait_and_display_progress

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def wait_and_display_progress(fut, progress_bar=None):
    """
    Wait for the future to complete and display the progress bar.
    This method may be used to drive any custom progress bar, which
    displays progress in percent from 0 to 100.

    Parameters
    ----------
    fut: dask future
        future object for the batch of tasks submitted to the distributed
        client.
    progress_bar: callable or None
        callable function or callable object with methods `start()`,
        `__call__(float)` and `finish()`. The methods `start()` and
        `finish()` are optional. For example, this could be a reference
        to an instance of the object `TerminalProgressBar`

    Examples
    --------

    .. code-block::

        client = Client()
        data = da.random.random(size=(100, 100), chunks=(10, 10))
        sm_fut = da.sum(data, axis=0).persist(scheduler=client)

        # Call the progress monitor
        wait_and_display_progress(sm_fut, TerminalProgressBar("Monitoring progress: "))

        sm = sm_fut.compute(scheduler=client)
        client.close()
    """

    # If there is no progress bar, then just return without waiting for the future
    if progress_bar is None:
        return

    if hasattr(progress_bar, "start"):
        progress_bar.start()

    progress_bar(1.0)
    while True:
        done, not_done = wait(fut, return_when='FIRST_COMPLETED')
        n_completed, n_pending = len(done), len(not_done)
        n_total = n_completed + n_pending
        percent_completed = n_completed / n_total * 100.0 if n_total > 0 else 100.0

        # It is guaranteed that 'progress_bar' is called for 100% completion
        progress_bar(percent_completed)

        if not n_pending:
            break
        ttime.sleep(0.5)

    if hasattr(progress_bar, "finish"):
        progress_bar.finish()

开发者ID:NSLS-II，项目名称:PyXRF，代码行数:58，代码来源:map_processing.py

示例13: _chunk_numpy_array

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _chunk_numpy_array(data, chunk_size):
    """
    Convert a numpy array into Dask array with chunks of given size. The function
    splits the array into chunks along axes 0 and 1. If the array has more than 2 dimensions,
    then the remaining dimensions are not chunked. Note, that
    `dask_array = da.array(data, chunks=...)` will set the chunk size, but not split the
    data into chunks, therefore the array can not be loaded block by block by workers
    controlled by a distributed scheduler.

    Parameters
    ----------
    data: ndarray(float), 2 or more dimensions
        XRF map of the shape `(ny, nx, ne)`, where `ny` and `nx` represent the image size
        and `ne` is the number of points in spectra
    chunk_size: tuple(int, int) or list(int, int)
         Chunk size for axis 0 and 1: `(chunk_y, chunk_x`). The function will accept
         chunk size values that are larger then the respective `data` array dimensions.

    Returns
    -------
    data_dask: dask.array
        Dask array with the given chunk size
    """

    chunk_y, chunk_x = chunk_size
    ny, nx = data.shape[0:2]
    chunk_y, chunk_x = min(chunk_y, ny), min(chunk_x, nx)

    def _get_slice(n1, n2):
        data_slice = data[slice(n1 * chunk_y, min(n1 * chunk_y + chunk_y, ny)),
                          slice(n2 * chunk_x, min(n2 * chunk_x + chunk_x, nx))]
        # Wrap the slice into a list wiht appropriate dimensions
        for _ in range(2, data.ndim):
            data_slice = [data_slice]
        return data_slice

    # Chunk the numpy array and assemble it as a dask array
    data_dask = da.block([
        [
            _get_slice(_1, _2)
            for _2 in range(int(math.ceil(nx / chunk_x)))
        ]
        for _1 in range(int(math.ceil(ny / chunk_y)))
    ])

    return data_dask

开发者ID:NSLS-II，项目名称:PyXRF，代码行数:48，代码来源:map_processing.py

示例14: _fit_xrf_block

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def _fit_xrf_block(data, data_sel_indices,
                   matv, snip_param, use_snip):
    """
    Spectrum fitting for a block of XRF dataset. The function is intended to be
    called using `map_blocks` function for parallel processing using Dask distributed
    package.

    Parameters
    ----------
    data : ndarray
        block of an XRF dataset. Shape=(ny, nx, ne).
    data_sel_indices: tuple
        tuple `(n_start, n_end)` which defines the indices along axis 2 of `data` array
        that are used for fitting. Note that `ne` (in `data`) and `ne_model` (in `matv`)
        are not equal. But `n_end - n_start` MUST be equal to `ne_model`! Indexes
        `n_start .. n_end - 1` will be selected from each pixel.
    matv: ndarray
        Matrix of spectra of the selected elements (emission lines). Shape=(ne_model, n_lines)
    snip_param: dict
        Dictionary of parameters forwarded to 'snip' method for background removal.
        Keys: `e_offset`, `e_linear`, `e_quadratic` (parameters of the energy axis approximation),
        `b_width` (width of the window that defines resolution of the snip algorithm).
    use_snip: bool, optional
        enable/disable background removal using snip algorithm

    Returns
    -------
    data_out: ndarray
        array with fitting results. Shape: `(ny, nx, ne_model + 4)`. For each pixel
        the output data contains: `ne_model` values that represent area under the emission
        line spectra; background area (only in the selected energy range), error (R-factor),
        total count in the selected energy range, total count of the full experimental spectrum.
    """
    spec = data
    spec_sel = spec[:, :, data_sel_indices[0]: data_sel_indices[1]]

    if use_snip:
        bg_sel = np.apply_along_axis(snip_method_numba, 2, spec_sel,
                                     snip_param['e_offset'],
                                     snip_param['e_linear'],
                                     snip_param['e_quadratic'],
                                     width=snip_param['b_width'])

        y = spec_sel - bg_sel
        bg_sum = np.sum(bg_sel, axis=2)

    else:
        y = spec_sel
        bg_sum = np.zeros(shape=data.shape[0:2])

    weights, rfactor, _ = fit_spectrum(y, matv, axis=2, method="nnls")

    total_cnt = np.sum(spec, axis=2)
    sel_cnt = np.sum(spec_sel, axis=2)

    # Stack depth-wise (along axis 2)
    data_out = np.dstack((weights, bg_sum, rfactor, sel_cnt, total_cnt))

    return data_out

开发者ID:NSLS-II，项目名称:PyXRF，代码行数:61，代码来源:map_processing.py

示例15: createSystemFromIUPAC

# 需要导入模块: import dask [as 别名]
# 或者: from dask import distributed [as 别名]
def createSystemFromIUPAC(iupac_name):
    """
    Create an openmm system out of an oemol

    Parameters
    ----------
    iupac_name : str
        IUPAC name

    Returns
    -------
    molecule : openeye.OEMol
        OEMol molecule
    system : openmm.System object
        OpenMM system
    positions : [n,3] np.array of floats
        Positions
    topology : openmm.app.Topology object
        Topology
    """
    from perses.utils.data import get_data_filename
    from perses.utils.openeye import extractPositionsFromOEMol
    # Create OEMol
    molecule = iupac_to_oemol(iupac_name)

    # Generate a topology.
    from openmoltools.forcefield_generators import generateTopologyFromOEMol
    topology = generateTopologyFromOEMol(molecule)

    # Initialize a forcefield with GAFF.
    # TODO: Fix path for `gaff.xml` since it is not yet distributed with OpenMM
    from simtk.openmm.app import ForceField
    gaff_xml_filename = get_data_filename('data/gaff.xml')
    forcefield = ForceField(gaff_xml_filename)

    # Generate template and parameters.
    from openmoltools.forcefield_generators import generateResidueTemplate
    [template, ffxml] = generateResidueTemplate(molecule)

    # Register the template.
    forcefield.registerResidueTemplate(template)

    # Add the parameters.
    forcefield.loadFile(StringIO(ffxml))

    # Create the system.
    system = forcefield.createSystem(topology, removeCMMotion=False)

    # Extract positions
    positions = extractPositionsFromOEMol(molecule)

    return (molecule, system, positions, topology)

开发者ID:choderalab，项目名称:perses，代码行数:54，代码来源:run_rj_neq.py

注：本文中的dask.distributed方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。