本文整理汇总了Python中torch.multiprocessing.spawn方法的典型用法代码示例。如果您正苦于以下问题:Python multiprocessing.spawn方法的具体用法?Python multiprocessing.spawn怎么用?Python multiprocessing.spawn使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.multiprocessing
的用法示例。
在下文中一共展示了multiprocessing.spawn方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def main():
args = parser.parse_args()
if args.seed is not None:
random.seed(args.seed)
torch.manual_seed(args.seed)
cudnn.deterministic = True
# torch.backends.cudnn.enabled = False
warnings.warn('You have chosen to seed training. '
'This will turn on the CUDNN deterministic setting, '
'which can slow down your training considerably! '
'You may see unexpected behavior when restarting '
'from checkpoints.')
args.local_rank = int(os.environ["SLURM_PROCID"])
args.world_size = int(os.environ["SLURM_NPROCS"])
ngpus_per_node = torch.cuda.device_count()
job_id = os.environ["SLURM_JOBID"]
args.dist_url = "file://{}.{}".format(os.path.realpath(args.dist_file), job_id)
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
示例2: setup
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def setup(rank, device_ids, args):
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
# initialize the process group
dist.init_process_group("gloo", rank=rank, world_size=len(device_ids))
train_file, test_file, batch_size, epochs, gpu_mode, num_workers, retrain_model, \
retrain_model_path, gru_layers, hidden_size, learning_rate, weight_decay, model_dir, stats_dir, total_callers, \
train_mode = args
# issue with semaphore lock: https://github.com/pytorch/pytorch/issues/2517
# mp.set_start_method('spawn')
# Explicitly setting seed to make sure that models created in two processes
# start from same random weights and biases. https://github.com/pytorch/pytorch/issues/2517
torch.manual_seed(42)
train(train_file, test_file, batch_size, epochs, gpu_mode, num_workers, retrain_model, retrain_model_path,
gru_layers, hidden_size, learning_rate, weight_decay, model_dir, stats_dir, train_mode,
total_callers, rank, device_ids[rank])
cleanup()
示例3: predict_gpu
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def predict_gpu(file_chunks, output_filepath, model_path, batch_size, total_callers, devices, num_workers):
"""
Create a prediction table/dictionary of an images set using a trained model.
:param file_chunks: Path to chunked files
:param batch_size: Batch size used for prediction
:param model_path: Path to a trained model
:param output_filepath: Path to output directory
:param total_callers: Number of callers to spawn
:param devices: List of available CUDA devices
:param num_workers: Number of workers to be used by the dataloader
:return: Prediction dictionary
"""
# create the arguments to send for prediction
args = (output_filepath, model_path, batch_size, num_workers)
# spawn the processes to call the prediction method
mp.spawn(setup,
args=(total_callers, args, file_chunks, devices),
nprocs=total_callers,
join=True)
示例4: _inference
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def _inference(self, cand):
# bn_statistic
parent_conn, child_conn = mp.Pipe()
args = dict({"local_rank": 0, "distributed": False})
mp.spawn(
bn_statistic, nprocs=self.ngpus_per_node,
args=(self.ngpus_per_node, cfg, args, cand, child_conn))
salt = parent_conn.recv()
# fitness
parent_conn, child_conn = mp.Pipe()
args = dict({"local_rank": 0, "distributed": False})
mp.spawn(
fitness, nprocs=self.ngpus_per_node,
args=(self.ngpus_per_node, cfg, args, cand, salt, child_conn))
if os.path.isfile(os.path.join(cfg.OUTPUT_DIR, salt+".pth")):
os.remove(os.path.join(cfg.OUTPUT_DIR, salt+".pth"))
return parent_conn.recv()
示例5: main
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def main():
if cfg.gpu is not None:
warnings.warn('You have chosen a specific GPU. This will completely '
'disable data parallelism.')
if cfg.dist_url == "env://" and cfg.world_size == -1:
cfg.world_size = int(os.environ["WORLD_SIZE"])
cfg.distributed = cfg.world_size > 1 or cfg.multiprocessing_distributed
ngpus_per_node = torch.cuda.device_count()
if cfg.multiprocessing_distributed:
# Since we have ngpus_per_node processes per node, the total world_size
# needs to be adjusted accordingly
cfg.world_size = ngpus_per_node * cfg.world_size
# Use torch.multiprocessing.spawn to launch distributed processes: the
# main_worker process function
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, cfg))
else:
# Simply call main_worker function
main_worker(cfg.gpu, ngpus_per_node, cfg)
示例6: test_torch_mp_example
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def test_torch_mp_example(self):
# in practice set the max_interval to a larger value (e.g. 60 seconds)
mp_queue = mp.get_context("spawn").Queue()
server = timer.LocalTimerServer(mp_queue, max_interval=0.01)
server.start()
world_size = 8
# all processes should complete successfully
# since start_process does NOT take context as parameter argument yet
# this method WILL FAIL (hence the test is disabled)
torch_mp.spawn(
fn=_happy_function, args=(mp_queue,), nprocs=world_size, join=True
)
with self.assertRaises(Exception):
# torch.multiprocessing.spawn kills all sub-procs
# if one of them gets killed
torch_mp.spawn(
fn=_stuck_function, args=(mp_queue,), nprocs=world_size, join=True
)
server.stop()
示例7: run
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def run(args):
model = getattr(importlib.import_module(args.cam_network), 'CAM')()
model.load_state_dict(torch.load(args.cam_weights_name + '.pth'), strict=True)
model.eval()
n_gpus = torch.cuda.device_count()
dataset = voc12.dataloader.VOC12ClassificationDatasetMSF(args.train_list,
voc12_root=args.voc12_root, scales=args.cam_scales)
dataset = torchutils.split_dataset(dataset, n_gpus)
print('[ ', end='')
multiprocessing.spawn(_work, nprocs=n_gpus, args=(model, dataset, args), join=True)
print(']')
torch.cuda.empty_cache()
示例8: run
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def run(args):
model = getattr(importlib.import_module(args.irn_network), 'EdgeDisplacement')()
model.load_state_dict(torch.load(args.irn_weights_name), strict=False)
model.eval()
n_gpus = torch.cuda.device_count()
dataset = voc12.dataloader.VOC12ClassificationDatasetMSF(args.infer_list,
voc12_root=args.voc12_root,
scales=(1.0,))
dataset = torchutils.split_dataset(dataset, n_gpus)
print("[", end='')
multiprocessing.spawn(_work, nprocs=n_gpus, args=(model, dataset, args), join=True)
print("]")
torch.cuda.empty_cache()
示例9: main
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def main():
print("NCCL_IB_DISABLE: {}".format(os.getenv("NCCL_IB_DISABLE")))
args = parser.parse_args()
print("quick_run is {}".format(args.quick_run))
print("output_dir is {}".format(args.output_dir))
print("data_dir is {}".format(args.data_dir))
print("cache_dir is {}".format(args.cache_dir))
# shutil.rmtree(args.output_dir)
os.makedirs(args.output_dir, exist_ok=True)
os.makedirs(args.cache_dir, exist_ok=True)
ngpus_per_node = torch.cuda.device_count()
processor = ExtSumProcessor(model_name=args.model_name)
summarizer = ExtractiveSummarizer(
processor, args.model_name, args.encoder, args.max_pos_length, args.cache_dir
)
mp.spawn(
main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, summarizer, args)
)
示例10: test
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def test(self,
p_model=None # the path to load the pretrained or previously self-trained model
):
# Check
if p_model is None or not is_file_here(p_model):
self.log("Need to provide a valid model path")
return
# Set path
match = re.search(r'\b/[0-9a-fA-F]{7}-cnn-(rgb|flow)[^/]*/\b', p_model)
model_id = match.group()[1:-1]
if model_id is None:
self.log("Cannot find a valid model id from the model path.")
return
p_root = p_model[:match.start()] + "/" + model_id + "/"
p_metadata_test = p_root + "metadata/metadata_test.json" # metadata path (test)
save_log_path = p_root + "log/test.log" # path to save log files
save_viz_path = p_root + "viz/" # path to save visualizations
# Spawn processes
n_gpu = torch.cuda.device_count()
if False:#self.parallel and n_gpu > 1:
# TODO: multiple GPUs will cause an error when generating summary videos
self.can_parallel = True
self.log("Let's use " + str(n_gpu) + " GPUs!")
mp.spawn(self.test_worker, nprocs=n_gpu,
args=(n_gpu, p_model, save_log_path, self.p_frame, save_viz_path, p_metadata_test), join=True)
else:
self.test_worker(0, 1, p_model, save_log_path, self.p_frame, save_viz_path, p_metadata_test)
示例11: main
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def main():
args = parser.parse_args()
ngpus_per_node = torch.cuda.device_count()
args.world_size = ngpus_per_node * args.world_size
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
示例12: main
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def main():
args = parser.parse_args()
mp.spawn(main_worker, nprocs=4, args=(4, args))
示例13: fit
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def fit(self,
p_model=None, # the path to load the pretrained or previously self-trained model
model_id_suffix="", # the suffix appended after the model id
p_metadata_train="../data/split/metadata_train_split_0_by_camera.json", # metadata path (train)
p_metadata_validation="../data/split/metadata_validation_split_0_by_camera.json", # metadata path (validation)
p_metadata_test="../data/split/metadata_test_split_0_by_camera.json", # metadata path (test)
save_model_path="../data/saved_i3d/[model_id]/model/", # path to save the models ([model_id] will be replaced)
save_tensorboard_path="../data/saved_i3d/[model_id]/run/", # path to save data ([model_id] will be replaced)
save_log_path="../data/saved_i3d/[model_id]/log/train.log", # path to save log files ([model_id] will be replaced)
save_metadata_path="../data/saved_i3d/[model_id]/metadata/" # path to save metadata ([model_id] will be replaced)
):
# Set path
model_id = str(uuid.uuid4())[0:7] + "-i3d-" + self.mode
model_id += model_id_suffix
save_model_path = save_model_path.replace("[model_id]", model_id)
save_tensorboard_path = save_tensorboard_path.replace("[model_id]", model_id)
save_log_path = save_log_path.replace("[model_id]", model_id)
save_metadata_path = save_metadata_path.replace("[model_id]", model_id)
# Copy training, validation, and testing metadata
check_and_create_dir(save_metadata_path)
shutil.copy(p_metadata_train, save_metadata_path + "metadata_train.json")
shutil.copy(p_metadata_validation, save_metadata_path + "metadata_validation.json")
shutil.copy(p_metadata_test, save_metadata_path + "metadata_test.json")
# Spawn processes
n_gpu = torch.cuda.device_count()
if self.parallel and n_gpu > 1:
self.can_parallel = True
self.log("Let's use " + str(n_gpu) + " GPUs!")
mp.spawn(self.fit_worker, nprocs=n_gpu,
args=(n_gpu, p_model, save_model_path, save_tensorboard_path, save_log_path, self.p_frame,
p_metadata_train, p_metadata_validation, p_metadata_test), join=True)
else:
self.fit_worker(0, 1, p_model, save_model_path, save_tensorboard_path, save_log_path, self.p_frame,
p_metadata_train, p_metadata_validation, p_metadata_test)
示例14: test
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def test(self,
p_model=None # the path to load the pretrained or previously self-trained model
):
# Check
if p_model is None or not is_file_here(p_model):
self.log("Need to provide a valid model path")
return
# Set path
match = re.search(r'\b/[0-9a-fA-F]{7}-i3d-(rgb|flow)[^/]*/\b', p_model)
model_id = match.group()[1:-1]
if model_id is None:
self.log("Cannot find a valid model id from the model path.")
return
p_root = p_model[:match.start()] + "/" + model_id + "/"
p_metadata_test = p_root + "metadata/metadata_test.json" # metadata path (test)
save_log_path = p_root + "log/test.log" # path to save log files
save_viz_path = p_root + "viz/" # path to save visualizations
# Spawn processes
n_gpu = torch.cuda.device_count()
if False:#self.parallel and n_gpu > 1:
# TODO: multiple GPUs will cause an error when generating summary videos
self.can_parallel = True
self.log("Let's use " + str(n_gpu) + " GPUs!")
mp.spawn(self.test_worker, nprocs=n_gpu,
args=(n_gpu, p_model, save_log_path, self.p_frame, save_viz_path, p_metadata_test), join=True)
else:
self.test_worker(0, 1, p_model, save_log_path, self.p_frame, save_viz_path, p_metadata_test)
示例15: fit
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import spawn [as 别名]
def fit(self,
p_model=None, # the path to load the pretrained or previously self-trained model
model_id_suffix="", # the suffix appended after the model id
p_metadata_train="../data/split/metadata_train_split_0_by_camera.json", # metadata path (train)
p_metadata_validation="../data/split/metadata_validation_split_0_by_camera.json", # metadata path (validation)
p_metadata_test="../data/split/metadata_test_split_0_by_camera.json", # metadata path (test)
save_model_path="../data/saved_cnn/[model_id]/model/", # path to save the models ([model_id] will be replaced)
save_tensorboard_path="../data/saved_cnn/[model_id]/run/", # path to save data ([model_id] will be replaced)
save_log_path="../data/saved_cnn/[model_id]/log/train.log", # path to save log files ([model_id] will be replaced)
save_metadata_path="../data/saved_cnn/[model_id]/metadata/" # path to save metadata ([model_id] will be replaced)
):
# Set path
model_id = str(uuid.uuid4())[0:7] + "-cnn-" + self.mode
model_id += model_id_suffix
save_model_path = save_model_path.replace("[model_id]", model_id)
save_tensorboard_path = save_tensorboard_path.replace("[model_id]", model_id)
save_log_path = save_log_path.replace("[model_id]", model_id)
save_metadata_path = save_metadata_path.replace("[model_id]", model_id)
# Copy training, validation, and testing metadata
check_and_create_dir(save_metadata_path)
shutil.copy(p_metadata_train, save_metadata_path + "metadata_train.json")
shutil.copy(p_metadata_validation, save_metadata_path + "metadata_validation.json")
shutil.copy(p_metadata_test, save_metadata_path + "metadata_test.json")
# Spawn processes
n_gpu = torch.cuda.device_count()
if self.parallel and n_gpu > 1:
self.can_parallel = True
self.log("Let's use " + str(n_gpu) + " GPUs!")
mp.spawn(self.fit_worker, nprocs=n_gpu,
args=(n_gpu, p_model, save_model_path, save_tensorboard_path, save_log_path, self.p_frame,
p_metadata_train, p_metadata_validation, p_metadata_test), join=True)
else:
self.fit_worker(0, 1, p_model, save_model_path, save_tensorboard_path, save_log_path, self.p_frame,
p_metadata_train, p_metadata_validation, p_metadata_test)