本文整理汇总了Python中dask.distributed.LocalCluster方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.LocalCluster方法的具体用法?Python distributed.LocalCluster怎么用?Python distributed.LocalCluster使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类dask.distributed
的用法示例。
在下文中一共展示了distributed.LocalCluster方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: LaunchDaskDistributedClient
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import LocalCluster [as 别名]
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):
if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:
from multiprocessing.pool import ThreadPool
try:
import dask
from dask.distributed import Client, LocalCluster
except ImportError:
raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")
dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
# INITIALISE CLUSTER
if scheduler_ip is None:
cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
client = Client(cluster)
else:
client = Client(scheduler_ip)
self.dask_client = client
self.is_dask_scheduler_initialised = True
示例2: create_cluster
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import LocalCluster [as 别名]
def create_cluster(self):
self.cluster = LocalCluster(
n_workers=1, processes=False, silence_logs=logging.DEBUG)
self.client = Client(self.cluster)
示例3: build_dict
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import LocalCluster [as 别名]
def build_dict(self, pipeline_name):
'''Returns a dict we can use for kwargs passed to dask client instantiation.
Intended to be used like:
with dask.distributed.Client(**cfg.build_dict()) as client:
<< use client here >>
'''
if self.cluster_type in ['yarn', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube']:
dask_cfg = {'name': pipeline_name}
else:
dask_cfg = {}
if self.cluster_configuration:
for k, v in self.cluster_configuration.items():
dask_cfg[k] = v
# if address is set, don't add LocalCluster args
# context: https://github.com/dask/distributed/issues/3313
if (self.cluster_type == 'local') and ('address' not in dask_cfg):
# We set threads_per_worker because Dagster is not thread-safe. Even though
# environments=True by default, there is a clever piece of machinery
# (dask.distributed.deploy.local.nprocesses_nthreads) that automagically makes execution
# multithreaded by default when the number of available cores is greater than 4.
# See: https://github.com/dagster-io/dagster/issues/2181
# We may want to try to figure out a way to enforce this on remote Dask clusters against
# which users run Dagster workloads.
dask_cfg['threads_per_worker'] = 1
return dask_cfg
示例4: _prepare_client
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import LocalCluster [as 别名]
def _prepare_client(client_or_address, num_workers):
"""
:param client_or_address: one of:
* None
* verbatim: 'local'
* string address
* a Client instance
:return: a tuple: (Client instance, shutdown callback function).
:raises: ValueError if no valid client input was provided.
"""
# Credits to Thomas Moerman (arboreto package):
# https://github.com/tmoerman/arboreto/blob/482ce8598da5385eb0e01a50362cb2b1e6f66a41/arboreto/algo.py#L145-L191
if client_or_address is None or str(client_or_address).lower() == 'local':
local_cluster = LocalCluster(n_workers=num_workers,
threads_per_worker=1)
client = Client(local_cluster)
def close_client_and_local_cluster(verbose=False):
if verbose:
LOGGER.info('shutting down client and local cluster')
client.close()
local_cluster.close()
return client, close_client_and_local_cluster
elif isinstance(client_or_address, str) and client_or_address.lower() != 'local':
client = Client(client_or_address)
def close_client(verbose=False):
if verbose:
LOGGER.info('shutting down client')
client.close()
return client, close_client
elif isinstance(client_or_address, Client):
def close_dummy(verbose=False):
if verbose:
LOGGER.info('not shutting down client, client was created externally')
return None
return client_or_address, close_dummy
else:
raise ValueError("Invalid client specified {}".format(str(client_or_address)))
示例5: run
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import LocalCluster [as 别名]
def run(cfg_fname):
# Read configuration file.
cfg = ConfigParser()
cfg.read(cfg_fname)
# Set logging level.
logging_debug_opt = cfg["params"]["debug"].lower().strip() in {"yes", "true", "y"}
LOGGER.addHandler(create_logging_handler(logging_debug_opt))
LOGGER.setLevel(logging.DEBUG)
# Derive file names.
#mtx_fnames = list(mapcat(glob.glob, cfg['data']['mtx_fnames'].split(";")))
mtx_fnames = glob.glob(cfg['data']['mtx_fnames'])
tfs = load_tf_names(cfg['data']['tfs_fname'])
# Derive cluster information.
not_cluster_ip = 'scheduler_ip' not in cfg['params']
if not_cluster_ip:
local_cluster = LocalCluster(n_workers=int(cfg['params']['num_cores']),
threads_per_worker=1)
client = Client(local_cluster)
else:
class DummyClient:
def close(self):
pass
local_cluster = DummyClient()
client = cfg['params']['scheduler_ip']
# Remove fnames that already have a corresponding results file.
def add_output(fname, out_folder):
basename = os.path.splitext(os.path.basename(fname))[0]
return fname, os.path.join(out_folder, "{}.net.csv".format(basename))
out_folder = cfg['data']['out_folder']
for in_fname, out_fname in filter(lambda t: not os.path.exists(t[1]),
map(partial(add_output, out_folder=out_folder),
mtx_fnames)):
LOGGER.info("Running GRNboost for {}.".format(in_fname))
try:
process(in_fname, tfs, out_fname, client)
except ValueError as e:
LOGGER.error("Unable to process {} because of \"{}\". Stacktrace:".format(in_fname, str(e)))
LOGGER.error(traceback.format_exc())
if not_cluster_ip:
client.close()
local_cluster.close()
print("{} - Done.".format(datetime.datetime.now()))
示例6: main
# 需要导入模块: from dask import distributed [as 别名]
# 或者: from dask.distributed import LocalCluster [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--xdim', type=int, default=500000)
parser.add_argument('--ydim', type=int, default=500000)
parser.add_argument('--x_chunk_size', type=int, default=10000)
parser.add_argument('--y_chunk_size', type=int, default=10000)
parser.add_argument('--use_gpus_only', action="store_true")
parser.add_argument('--n_gpus', type=int, default=1)
parser.add_argument('--use_cpus_only', action="store_true")
parser.add_argument('--n_cpu_sockets', type=int, default=1)
parser.add_argument('--n_cpu_cores_per_socket', type=int, default=1)
parser.add_argument('--use_distributed_dask', action="store_true")
args = parser.parse_args()
sched_ip, sched_uri = get_scheduler_info()
if args.use_distributed_dask:
print('Using Distributed Dask')
client = Client(sched_uri)
elif args.use_gpus_only:
print('Using GPUs and Local Dask')
cluster = LocalCUDACluster(ip=sched_ip, n_workers=args.n_gpus)
client = Client(cluster)
elif args.use_cpus_only:
print('Using CPUs and Local Dask')
cluster = LocalCluster(ip=sched_ip, n_workers=args.n_cpu_sockets,
threads_per_worker=args.n_cpu_cores_per_socket)
client = Client(cluster)
else:
print("Exiting...")
sys.exit(-1)
start = time.time()
if args.use_gpus_only:
print('Allocating and initializing arrays using GPU memory with CuPY')
rs = da.random.RandomState(RandomState=cupy.random.RandomState)
elif args.use_cpus_only:
print('Allocating and initializing arrays using CPU memory')
rs = da.random.RandomState()
x = create_data(rs, args.xdim, args.ydim, args.x_chunk_size,
args.y_chunk_size)
print('Array size: {:.2f} TB. Computing parallel sum . . .'.format(
x.nbytes / 1e12))
run(x)
end = time.time()
delta = (end - start)
print('Processing complete.')
print('Wall time create data + computation time: {:10.8f} seconds'.format(
delta))
del x