本文整理汇总了Python中dask.distributed.Client方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.Client方法的具体用法?Python distributed.Client怎么用?Python distributed.Client使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dask.distributed
的用法示例。
在下文中一共展示了distributed.Client方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: LaunchDaskDistributedClient
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):
if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:
from multiprocessing.pool import ThreadPool
try:
import dask
from dask.distributed import Client, LocalCluster
except ImportError:
raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")
dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
# INITIALISE CLUSTER
if scheduler_ip is None:
cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
client = Client(cluster)
else:
client = Client(scheduler_ip)
self.dask_client = client
self.is_dask_scheduler_initialised = True
示例2: dask_client_create
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def dask_client_create(**kwargs):
"""
Create Dask client object. The function is trivial and introduced so that
Dask client is created in uniform way throughout the program.
Parameters
----------
kwargs: dict, optional
kwargs will be passed to the Dask client constructor
Returns
-------
client: dask.distributed.Client
Dask client object
"""
_kwargs = {"processes": True, "silence_logs": logging.ERROR}
_kwargs.update(kwargs)
client = Client(**_kwargs)
dask.config.set(shuffle="disk")
path_dask_data = os.path.expanduser("~/.dask")
dask.config.set({"temporary_directory": path_dask_data})
return client
示例3: activate_client
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def activate_client(self,
LSF = True,
num_processes = 2,
adapt = False):
if LSF:
from dask_jobqueue import LSFCluster
cluster = LSFCluster()
self._adapt = adapt
self.num_processes = num_processes
if self._adapt:
_logger.debug(f"adapting cluster from 1 to {self.num_processes} processes")
cluster.adapt(minimum = 2, maximum = self.num_processes, interval = "1s")
else:
_logger.debug(f"scaling cluster to {self.num_processes} processes")
cluster.scale(self.num_processes)
_logger.debug(f"scheduling cluster with client")
self.client = distributed.Client(cluster)
else:
self.client = None
self._adapt = False
self.num_processes = 0
示例4: setup_dask
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def setup_dask(scheduler, retries=-1):
if scheduler is None or scheduler == "{scheduler}":
print("Setting up local cluster...")
return Client()
succeeded = False
try_num = 0
while not succeeded:
try_num += 1
if try_num == retries:
raise Exception("Failed to connect to Dask client")
try:
client = Client(scheduler, timeout=60)
succeeded = True
except Exception as e: # pylint: disable=broad-except
print(e)
time.sleep(15)
return client
示例5: main
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def main():
client = Client() # noqa
categories = ["category_%d" % i for i in range(26)]
columns = ["click"] + ["numeric_%d" % i for i in range(13)] + categories
df = dd.read_csv("day_1", sep="\t", names=columns, header=None)
encoding = {c: "bytes" for c in categories}
fixed = {c: 8 for c in categories}
df.to_parquet(
"day-1-bytes.parquet",
object_encoding=encoding,
fixed_text=fixed,
compression="SNAPPY",
)
示例6: run_search
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def run_search():
from dask.distributed import Client, LocalCluster
import joblib
import hypertunity as ht
#client = Client(scheduler_file='scheduler.json')
client = Client()
print(client)
domain = ht.Domain({
"cost_rate": set([-.8])
})
# with joblib.parallel_backend('dask'):
# with joblib.Parallel() as parallel:
# print("Doing the work ... ")
# results = parallel(joblib.delayed(run_games)(*domain.sample().as_namedtuple()) for s in range(1))
#
# print(results)
run_games(-.8)
示例7: create_cluster
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def create_cluster(self):
self.cluster = LocalCluster(
n_workers=1, processes=False, silence_logs=logging.DEBUG)
self.client = Client(self.cluster)
示例8: test_dask_multiprocessing
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def test_dask_multiprocessing(tmpdir):
"""
Test that dask multiprocessing works on Python 3.
"""
# Command to start the kernel
cmd = "from spyder_kernels.console import start; start.main()"
with setup_kernel(cmd) as client:
# Remove all variables
client.execute("%reset -f")
client.get_shell_msg(block=True, timeout=TIMEOUT)
# Write multiprocessing code to a file
# Runs two times to verify that in the second case it doesn't break
code = """
from dask.distributed import Client
if __name__=='__main__':
client = Client()
client.close()
x = 'hello'
"""
p = tmpdir.join("mp-test.py")
p.write(code)
# Run code two times
client.execute("runfile(r'{}')".format(to_text_string(p)))
client.get_shell_msg(block=True, timeout=TIMEOUT)
client.execute("runfile(r'{}')".format(to_text_string(p)))
client.get_shell_msg(block=True, timeout=TIMEOUT)
# Verify that the `x` variable is defined
client.inspect('x')
msg = client.get_shell_msg(block=True, timeout=TIMEOUT)
content = msg['content']
assert content['found']
示例9: test_cross_val_score_client
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def test_cross_val_score_client(trend):
"Test the deprecated dask Client interface"
coords, data = trend[:2]
model = Trend(degree=1)
nsplits = 5
cross_validator = ShuffleSplit(n_splits=nsplits, random_state=0)
client = Client(processes=False)
futures = cross_val_score(model, coords, data, cv=cross_validator, client=client)
scores = [future.result() for future in futures]
client.close()
assert len(scores) == nsplits
npt.assert_allclose(scores, 1)
示例10: setup
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def setup(self, *args, **kwargs):
"""Benchmark time and peak memory of `compute_hindcast` and
`bootstrap_hindcast`. This executes the same tests as `Compute` but
on chunked data with dask.distributed.Client."""
requires_dask()
# magic taken from
# https://github.com/pydata/xarray/blob/stable/asv_bench/benchmarks/rolling.py
super().setup(**kwargs)
self.client = Client()
示例11: setup
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def setup(self, *args, **kwargs):
"""Benchmark time and peak memory of `compute_perfect_model` and
`bootstrap_perfect_model`. This executes the same tests as `Compute` but
on chunked data with dask.distributed.Client."""
requires_dask()
# magic taken from
# https://github.com/pydata/xarray/blob/stable/asv_bench/benchmarks/rolling.py
super().setup(**kwargs)
self.client = Client()
示例12: test_run_dask
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def test_run_dask(fix_task_env):
import numpy as np
from dask import delayed as dl
from dask.distributed import Client
dc = Client(processes=False)
input_task_example, gathered_task_example, post_processing_task_example = (
fix_task_env
)
parts = {"a": [0.0, 1.0, 2.0], "b": [-3.0, 10.0, 2.0], "c": [20.0]}
numpoints = 20
prefactor = 0.1
input_delayed = dl(input_task_example)(parts)
gathered_delayed = dl(gathered_task_example, nout=1)([input_delayed], [numpoints])[
0
]
post_proc_delayed = dl(post_processing_task_example)(
input_delayed, gathered_delayed, prefactor
)
input_future = dc.compute(input_delayed)
gathered_future = dc.compute(gathered_delayed)
post_proc_future = dc.compute(post_proc_delayed)
input_data = input_future.result()
gathered_data = gathered_future.result()
post_proc_data = post_proc_future.result()
assert input_data == parts
gather_results = {}
for part in parts:
gather_results[part] = np.linspace(0.0, 1.0, numpoints)
for part in gather_results:
assert np.all(gathered_data[part] == gather_results[part])
post_proc_results = 0.0
for part in parts:
post_proc_results += (
prefactor * np.sum(input_data[part]) * np.sum(gather_results[part])
)
assert post_proc_data == post_proc_results
示例13: apply
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def apply(
self,
df: dd.DataFrame,
scheduler: Scheduler = "processes",
fault_tolerant: bool = False,
) -> np.ndarray:
"""Label Dask DataFrame of data points with LFs.
Parameters
----------
df
Dask DataFrame containing data points to be labeled by LFs
scheduler
A Dask scheduling configuration: either a string option or
a ``Client``. For more information, see
https://docs.dask.org/en/stable/scheduling.html#
fault_tolerant
Output ``-1`` if LF execution fails?
Returns
-------
np.ndarray
Matrix of labels emitted by LFs
"""
f_caller = _FunctionCaller(fault_tolerant)
apply_fn = partial(apply_lfs_to_data_point, lfs=self._lfs, f_caller=f_caller)
map_fn = df.map_partitions(lambda p_df: p_df.apply(apply_fn, axis=1))
labels = map_fn.compute(scheduler=scheduler)
labels_with_index = rows_to_triplets(labels)
return self._numpy_from_row_data(labels_with_index)
示例14: main
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def main():
# Setup logging on the main process:
_start_logging()
# Start three worker processes on the local machine:
client = Client(n_workers=3, threads_per_worker=1)
# Setup Eliot logging on each worker process:
client.run(_start_logging)
# Run the Dask computation in the worker processes:
result = main_computation()
print("Result:", result)
示例15: test_future
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import Client [as 别名]
def test_future(self):
"""compute_with_trace() can handle Futures."""
client = Client(processes=False)
self.addCleanup(client.shutdown)
[bag] = dask.persist(from_sequence([1, 2, 3]))
bag = bag.map(lambda x: x * 5)
result = dask.compute(bag)
self.assertEqual(result, ([5, 10, 15],))
self.assertEqual(result, compute_with_trace(bag))