本文整理汇总了Python中torch.multiprocessing方法的典型用法代码示例。如果您正苦于以下问题:Python torch.multiprocessing方法的具体用法?Python torch.multiprocessing怎么用?Python torch.multiprocessing使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch
的用法示例。
在下文中一共展示了torch.multiprocessing方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def main():
if cfg.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if cfg.dist_url == "env://" and cfg.world_size == -1:
cfg.world_size = int(os.environ["WORLD_SIZE"])
cfg.distributed = cfg.world_size > 1 or cfg.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if cfg.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
cfg.world_size = ngpus_per_node * cfg.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, cfg))
else:
# Simply call main_worker function
main_worker(cfg.gpu, ngpus_per_node, cfg)
示例2: set_cuda_id
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def set_cuda_id(spec):
'''Use trial and session id to hash and modulo cuda device count for a cuda_id to maximize device usage. Sets the net_spec for the base Net class to pick up.'''
# Don't trigger any cuda call if not using GPU. Otherwise will break multiprocessing on machines with CUDA.
# see issues https://github.com/pytorch/pytorch/issues/334 https://github.com/pytorch/pytorch/issues/3491 https://github.com/pytorch/pytorch/issues/9996
for agent_spec in spec['agent']:
if 'net' not in agent_spec or not agent_spec['net'].get('gpu'):
return
meta_spec = spec['meta']
trial_idx = meta_spec['trial'] or 0
session_idx = meta_spec['session'] or 0
if meta_spec['distributed'] == 'shared': # shared hogwild uses only global networks, offset them to idx 0
session_idx = 0
job_idx = trial_idx * meta_spec['max_session'] + session_idx
job_idx += meta_spec['cuda_offset']
device_count = torch.cuda.device_count()
cuda_id = None if not device_count else job_idx % device_count
for agent_spec in spec['agent']:
agent_spec['net']['cuda_id'] = cuda_id
示例3: _dataset_from_chunk
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def _dataset_from_chunk(cls, chunk, processor):
"""
Creating a dataset for a chunk (= subset) of dicts. In multiprocessing:
* we read in all dicts from a file
* split all dicts into chunks
* feed *one chunk* to *one process*
=> the *one chunk* gets converted to *one dataset* (that's what we do here)
* all datasets get collected and concatenated
:param chunk: Instead of only having a list of dicts here we also supply an index (ascending int) for each.
=> [(0, dict), (1, dict) ...]
:type chunk: list of tuples
:param processor: FARM Processor (e.g. TextClassificationProcessor)
:return: PyTorch Dataset
"""
dicts = [d[1] for d in chunk]
indices = [x[0] for x in chunk]
dataset = processor.dataset_from_dicts(dicts=dicts, indices=indices)
return dataset
示例4: set_cuda_id
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def set_cuda_id(spec):
'''Use trial and session id to hash and modulo cuda device count for a cuda_id to maximize device usage. Sets the net_spec for the base Net class to pick up.'''
# Don't trigger any cuda call if not using GPU. Otherwise will break multiprocessing on machines with CUDA.
# see issues https://github.com/pytorch/pytorch/issues/334 https://github.com/pytorch/pytorch/issues/3491 https://github.com/pytorch/pytorch/issues/9996
for agent_spec in spec['agent']:
if not agent_spec['net'].get('gpu'):
return
meta_spec = spec['meta']
trial_idx = meta_spec['trial'] or 0
session_idx = meta_spec['session'] or 0
if meta_spec['distributed'] == 'shared': # shared hogwild uses only global networks, offset them to idx 0
session_idx = 0
job_idx = trial_idx * meta_spec['max_session'] + session_idx
job_idx += meta_spec['cuda_offset']
device_count = torch.cuda.device_count()
cuda_id = job_idx % device_count if torch.cuda.is_available() else None
for agent_spec in spec['agent']:
agent_spec['net']['cuda_id'] = cuda_id
示例5: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def __init__(self, loader):
self.dataset = loader.dataset
self.collate_fn = loader.collate_fn
self.batch_sampler = loader.batch_sampler
self.num_workers = loader.num_workers
self.pin_memory = loader.pin_memory
self.done_event = threading.Event()
self.sample_iter = iter(self.batch_sampler)
if self.num_workers > 0:
self.index_queue = multiprocessing.Queue()
self.data_queue = multiprocessing.Queue()
self.batches_outstanding = 0
self.shutdown = False
self.send_idx = 0
self.rcvd_idx = 0
self.reorder_dict = {}
self.workers = [
multiprocessing.Process(
target=_worker_loop,
args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn))
for _ in range(self.num_workers)]
for w in self.workers:
w.daemon = True # ensure that the worker exits on process exit
w.start()
# prime the prefetch loop
for _ in range(2 * self.num_workers):
self._put_indices()
示例6: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False,
timeout=0, worker_init_fn=None):
self.dataset = dataset
self.batch_size = batch_size
self.num_workers = num_workers
self.collate_fn = collate_fn
self.pin_memory = pin_memory
self.drop_last = drop_last
self.timeout = timeout
self.worker_init_fn = worker_init_fn
if timeout < 0:
raise ValueError('timeout option should be non-negative')
if batch_sampler is not None:
if batch_size > 1 or shuffle or sampler is not None or drop_last:
raise ValueError('batch_sampler is mutually exclusive with '
'batch_size, shuffle, sampler, and drop_last')
if sampler is not None and shuffle:
raise ValueError('sampler is mutually exclusive with shuffle')
if self.num_workers < 0:
raise ValueError('num_workers cannot be negative; '
'use num_workers=0 to disable multiprocessing.')
if batch_sampler is None:
if sampler is None:
if shuffle:
sampler = RandomSampler(dataset)
else:
sampler = SequentialSampler(dataset)
batch_sampler = BatchSampler(sampler, batch_size, drop_last)
self.sampler = sampler
self.batch_sampler = batch_sampler
示例7: thread_wrapped_func
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def thread_wrapped_func(func):
"""Wrapped func for torch.multiprocessing.Process.
With this wrapper we can use OMP threads in subprocesses
otherwise, OMP_NUM_THREADS=1 is mandatory.
How to use:
@thread_wrapped_func
def func_to_wrap(args ...):
"""
@wraps(func)
def decorated_function(*args, **kwargs):
queue = mp.Queue()
def _queue_result():
exception, trace, res = None, None, None
try:
res = func(*args, **kwargs)
except Exception as e:
exception = e
trace = traceback.format_exc()
queue.put((res, exception, trace))
start_new_thread(_queue_result, ())
result, exception, trace = queue.get()
if exception is None:
return result
else:
assert isinstance(exception, Exception)
raise exception.__class__(trace)
return decorated_function
示例8: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def __init__(self, loader):
self.dataset = loader.dataset
self.collate_fn = loader.collate_fn
self.batch_sampler = loader.batch_sampler
self.num_workers = loader.num_workers
self.pin_memory = loader.pin_memory
self.done_event = threading.Event()
self.sample_iter = iter(self.batch_sampler)
if self.num_workers > 0:
self.index_queue = multiprocessing.SimpleQueue()
self.data_queue = multiprocessing.SimpleQueue()
self.batches_outstanding = 0
self.shutdown = False
self.send_idx = 0
self.rcvd_idx = 0
self.reorder_dict = {}
self.workers = [
multiprocessing.Process(
target=_worker_loop,
args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn, np.random.randint(0, 4294967296, dtype='uint32')))
for _ in range(self.num_workers)]
for w in self.workers:
w.daemon = True # ensure that the worker exits on process exit
w.start()
if self.pin_memory:
in_data = self.data_queue
self.data_queue = queue.Queue()
self.pin_thread = threading.Thread(
target=_pin_memory_loop,
args=(in_data, self.data_queue, self.done_event))
self.pin_thread.daemon = True
self.pin_thread.start()
# prime the prefetch loop
for _ in range(2 * self.num_workers):
self._put_indices()
示例9: main
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def main():
args = parser.parse_args()
if(not os.path.exists(os.path.join(args.save_folder, args.dataset, args.network))):
os.makedirs(os.path.join(args.save_folder, args.dataset, args.network))
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
os.environ['WORLD_SIZE'] = '2'
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node,
args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
示例10: run_in_process_group
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def run_in_process_group(world_size, filename, fn, inputs):
if torch.distributed.is_initialized():
torch.distributed.destroy_process_group()
processes = []
q = Queue()
wait_event = Event()
# run the remaining processes
# for rank in range(world_size - 1):
for rank in range(world_size):
p = Process(
target=init_and_run_process,
args=(rank, world_size, filename, fn, inputs[rank], q, wait_event),
)
p.start()
processes.append(p)
# fetch the results from the queue before joining, the background processes
# need to be alive if the queue contains tensors. See
# https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3 # noqa: B950
results = []
for _ in range(len(processes)):
results.append(q.get())
wait_event.set()
for p in processes:
p.join()
return results
示例11: main
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def main():
_A = parser.parse_args()
random.seed(_A.seed)
torch.manual_seed(_A.seed)
cudnn.deterministic = True
_A.world_size = torch.cuda.device_count()
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=_A.world_size, args=(_A.world_size, _A))
示例12: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def __init__(self, loader):
self.dataset = loader.dataset
self.collate_fn = loader.collate_fn
self.batch_sampler = loader.batch_sampler
self.num_workers = loader.num_workers
self.pin_memory = loader.pin_memory
self.done_event = threading.Event()
self.sample_iter = iter(self.batch_sampler)
if self.num_workers > 0:
self.index_queue = multiprocessing.SimpleQueue()
self.data_queue = multiprocessing.SimpleQueue()
self.batches_outstanding = 0
self.shutdown = False
self.send_idx = 0
self.rcvd_idx = 0
self.reorder_dict = {}
self.workers = [
multiprocessing.Process(
target=_worker_loop,
args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn))
for _ in range(self.num_workers)]
for w in self.workers:
w.daemon = True # ensure that the worker exits on process exit
w.start()
if self.pin_memory:
in_data = self.data_queue
self.data_queue = queue.Queue()
self.pin_thread = threading.Thread(
target=_pin_memory_loop,
args=(in_data, self.data_queue, self.done_event))
self.pin_thread.daemon = True
self.pin_thread.start()
# prime the prefetch loop
for _ in range(2 * self.num_workers):
self._put_indices()
示例13: main
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def main():
args = parser.parse_args()
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
if not args.cpu:
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
示例14: main
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def main():
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
if args.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if args.dist_url == "env://" and args.world_size == -1:
args.world_size = int(os.environ["WORLD_SIZE"])
args.distributed = args.world_size > 1 or args.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if args.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
args.world_size = ngpus_per_node * args.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
else:
# Simply call main_worker function
main_worker(args.gpu, ngpus_per_node, args)
示例15: __init__
# 需要导入模块: import torch [as 别名]
# 或者: from torch import multiprocessing [as 别名]
def __init__(self, threadsafe=False, shared=None):
self._threadsafe = threadsafe
if self._threadsafe and shared is None:
# Threadsafe metrics tracking works by keeping a queue that workers can
# push updates to. the main worker works through the queue at report
# time. We could add some buffering to improve performance, but we
# are deprioritizing hogwild performance at this time.
self._buffer = None
self._queue = multiprocessing.SimpleQueue()
self._worker = False
self._data = {}
elif shared and 'queue' in shared:
# This is a clone, in threadsafe mode
self._buffer = {}
self._queue = shared['queue']
self._worker = True
self._data = None
elif shared and 'data' in shared:
# This is a clone, in non-threadsafe mode
self._buffer = None
self._queue = None
self._worker = False
self._data = shared['data']
else:
# The original in non-threadsafe mode
self._buffer = None
self._queue = None
self._worker = False
self._data = {}