本文整理汇总了Python中torch.distributed.init_process_group方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.init_process_group方法的具体用法?Python distributed.init_process_group怎么用?Python distributed.init_process_group使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed
的用法示例。
在下文中一共展示了distributed.init_process_group方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setup
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def setup(rank, device_ids, args):
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
# initialize the process group
dist.init_process_group("gloo", rank=rank, world_size=len(device_ids))
train_file, test_file, batch_size, epochs, gpu_mode, num_workers, retrain_model, \
retrain_model_path, gru_layers, hidden_size, learning_rate, weight_decay, model_dir, stats_dir, total_callers, \
train_mode = args
# issue with semaphore lock: https://github.com/pytorch/pytorch/issues/2517
# mp.set_start_method('spawn')
# Explicitly setting seed to make sure that models created in two processes
# start from same random weights and biases. https://github.com/pytorch/pytorch/issues/2517
torch.manual_seed(42)
train(train_file, test_file, batch_size, epochs, gpu_mode, num_workers, retrain_model, retrain_model_path,
gru_layers, hidden_size, learning_rate, weight_decay, model_dir, stats_dir, train_mode,
total_callers, rank, device_ids[rank])
cleanup()
示例2: setup
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def setup(rank, total_callers, args, all_input_files, all_devices):
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
# initialize the process group
dist.init_process_group("gloo", rank=rank, world_size=total_callers)
# expand the arguments
output_filepath, model_path, batch_size, num_workers = args
# call prediction function
predict(all_input_files[rank],
output_filepath,
model_path,
batch_size,
num_workers,
rank,
all_devices[rank])
cleanup()
示例3: setup
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def setup(rank, total_callers, args, all_input_files):
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
# initialize the process group
dist.init_process_group("gloo", rank=rank, world_size=total_callers)
# expand the arguments
output_filepath, model_path, batch_size, num_workers, threads = args
# call prediction function
predict(all_input_files[rank],
output_filepath,
model_path,
batch_size,
num_workers,
rank,
threads)
cleanup()
示例4: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def __init__(self, rank, learner_ranks, worker_ranks, ip, port):
world_size = len(learner_ranks) + len(worker_ranks)
dist.init_process_group(
"nccl",
init_method="tcp://{}:{}".format(ip, port),
rank=rank,
world_size=world_size,
)
groups = {}
for learner_rank in learner_ranks:
for worker_rank in worker_ranks:
g = dist.new_group([learner_rank, worker_rank])
if worker_rank == rank:
groups[learner_rank] = g
dist.new_group(learner_ranks)
self.groups = groups
self.device = torch.device(f"cuda:{ray.get_gpu_ids()[0]}")
self.rank = rank
self.network = torch.zeros(3).to(self.device)
self.exp = None
self.network_handle = None
示例5: init_process_group
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def init_process_group(backend):
comm = MPI.COMM_WORLD
world_size = comm.Get_size()
rank = comm.Get_rank()
info = dict()
if rank == 0:
host = socket.gethostname()
address = socket.gethostbyname(host)
info.update(dict(MASTER_ADDR=address, MASTER_PORT='1234'))
info = comm.bcast(info, root=0)
info.update(dict(WORLD_SIZE=str(world_size), RANK=str(rank)))
os.environ.update(info)
distributed.init_process_group(backend=backend)
示例6: setup_distributed
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def setup_distributed(port=29500):
if not dist.is_available() or not torch.cuda.is_available() or torch.cuda.device_count() <= 1:
return 0, 1
if 'MPIR_CVAR_CH3_INTERFACE_HOSTNAME' in os.environ:
from mpi4py import MPI
mpi_rank = MPI.COMM_WORLD.Get_rank()
mpi_size = MPI.COMM_WORLD.Get_size()
os.environ["MASTER_ADDR"] = '127.0.0.1'
os.environ["MASTER_PORT"] = str(port)
dist.init_process_group(backend="nccl", world_size=mpi_size, rank=mpi_rank)
return mpi_rank, mpi_size
dist.init_process_group(backend="nccl", init_method="env://")
return dist.get_rank(), dist.get_world_size()
示例7: spmd_main
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def spmd_main(local_world_size, local_rank):
# These are the parameters used to initialize the process group
env_dict = {
key: os.environ[key]
for key in ("MASTER_ADDR", "MASTER_PORT", "RANK", "WORLD_SIZE")
}
print(f"[{os.getpid()}] Initializing process group with: {env_dict}")
dist.init_process_group(backend="nccl")
print(
f"[{os.getpid()}]: world_size = {dist.get_world_size()}, "
+ f"rank = {dist.get_rank()}, backend={dist.get_backend()}"
)
demo_basic(local_world_size, local_rank)
# Tear down the process group
dist.destroy_process_group()
示例8: setup
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def setup():
os.environ['MASTER_ADDR'] = args.master
os.environ['MASTER_PORT'] = '29500'
# initialize the process group
dist.init_process_group("gloo", rank=args.rank, world_size=args.world_size)
# Explicitly setting seed makes sure that models created in two processes
# start from same random weights and biases. Alternatively, sync models
# on start with the callback below.
#torch.manual_seed(42)
示例9: _init_dist_pytorch
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def _init_dist_pytorch(backend, **kwargs):
# TODO: use local_rank instead of rank % num_gpus
rank = int(os.environ['RANK'])
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(rank % num_gpus)
dist.init_process_group(backend=backend, **kwargs)
示例10: _init_dist_slurm
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def _init_dist_slurm(backend, port=29500, **kwargs):
proc_id = int(os.environ['SLURM_PROCID'])
ntasks = int(os.environ['SLURM_NTASKS'])
node_list = os.environ['SLURM_NODELIST']
num_gpus = torch.cuda.device_count()
torch.cuda.set_device(proc_id % num_gpus)
addr = subprocess.getoutput(
'scontrol show hostname {} | head -n1'.format(node_list))
os.environ['MASTER_PORT'] = str(port)
os.environ['MASTER_ADDR'] = addr
os.environ['WORLD_SIZE'] = str(ntasks)
os.environ['RANK'] = str(proc_id)
dist.init_process_group(backend=backend)
示例11: main
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def main(args):
# Initialize multi-processing
distributed.init_process_group(backend='nccl', init_method='env://')
device_id, device = args.local_rank, torch.device(args.local_rank)
rank, world_size = distributed.get_rank(), distributed.get_world_size()
torch.cuda.set_device(device_id)
# Initialize logging
if rank == 0:
logging.init(args.log_dir, "test")
# Load configuration
config = make_config(args)
# Create dataloader
test_dataloader = make_dataloader(args, config, rank, world_size)
meta = load_meta(args.meta)
# Create model
model = make_model(config, meta["num_thing"], meta["num_stuff"])
# Load snapshot
log_debug("Loading snapshot from %s", args.model)
resume_from_snapshot(model, args.model, ["body", "rpn_head", "roi_head"])
# Init GPU stuff
torch.backends.cudnn.benchmark = config["general"].getboolean("cudnn_benchmark")
model = DistributedDataParallel(model.cuda(device), device_ids=[device_id], output_device=device_id)
if args.raw:
save_function = partial(save_prediction_raw, out_dir=args.out_dir)
else:
save_function = partial(
save_prediction_image, out_dir=args.out_dir, colors=meta["palette"],
num_stuff=meta["num_stuff"], threshold=args.threshold)
test(model, test_dataloader, device=device, summary=None,
log_interval=config["general"].getint("log_interval"), save_function=save_function)
示例12: setup
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def setup(hp, rank, world_size):
os.environ["MASTER_ADDR"] = hp.train.dist.master_addr
os.environ["MASTER_PORT"] = hp.train.dist.master_port
# initialize the process group
dist.init_process_group(hp.train.dist.mode, rank=rank, world_size=world_size)
示例13: main
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def main():
import torch.nn as nn
import torch.distributed as dist
import torch.optim as optim
import torch.utils.data
dist.init_process_group(backend='gloo')
torch.manual_seed(42)
data = torch.rand((1000, 32), dtype=torch.float32)
labels = torch.randint(1, (1000, 10), dtype=torch.float32)
train_dataset = torch.utils.data.TensorDataset(data, labels)
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=32,
shuffle=False,
sampler=train_sampler)
model = nn.parallel.DistributedDataParallel(get_model())
optimizer = optim.SGD(model.parameters(),
lr=0.01, momentum=0.5)
criterion = nn.BCELoss()
for _ in range(2):
# 2 epochs
for _, (batch_data, batch_labels) in enumerate(train_loader):
outputs = model(batch_data)
loss = criterion(outputs.squeeze(), batch_labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
示例14: main
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def main():
import torch.nn as nn
import torch.distributed as dist
import torch.optim as optim
import torch.utils.data
import mars.tensor as mt
from mars.learn.contrib.pytorch import MarsDataset, MarsDistributedSampler
dist.init_process_group(backend='gloo')
torch.manual_seed(42)
data = mt.named_tensor(name='data')
labels = mt.named_tensor(name='labels')
train_dataset = MarsDataset(data, labels)
train_sampler = MarsDistributedSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=32,
shuffle=False,
sampler=train_sampler)
model = nn.parallel.DistributedDataParallel(get_model())
optimizer = optim.SGD(model.parameters(),
lr=0.01, momentum=0.5)
criterion = nn.BCELoss()
for _ in range(2):
# 2 epochs
for _, (batch_data, batch_labels) in enumerate(train_loader):
outputs = model(batch_data)
loss = criterion(outputs.squeeze(), batch_labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
示例15: init_processes
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import init_process_group [as 别名]
def init_processes(backend, master_addr, master_port, rank, world_size,
rows, columns, host, num_gpus):
# Initialize the distributed environment.
os.environ['WORLD_SIZE'] = str(world_size)
os.environ['RANK'] = str(rank)
os.environ['MASTER_ADDR'] = master_addr
os.environ['MASTER_PORT'] = master_port
logger.info('Init process rank {} on host \'{}\''.format(rank, host))
dist.init_process_group(backend=backend, rank=rank, world_size=world_size)
run(backend, rank, rows, columns, num_gpus)