本文整理汇总了Python中torch.distributed.get_rank方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.get_rank方法的具体用法?Python distributed.get_rank怎么用?Python distributed.get_rank使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed
的用法示例。
在下文中一共展示了distributed.get_rank方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
self.shuffle = shuffle
示例2: reduce_loss_dict
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def reduce_loss_dict(loss_dict):
"""
Reduce the loss dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
loss_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
loss_names = []
all_losses = []
for k in sorted(loss_dict.keys()):
loss_names.append(k)
all_losses.append(loss_dict[k])
all_losses = torch.stack(all_losses, dim=0)
dist.reduce(all_losses, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
all_losses /= world_size
reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
return reduced_losses
示例3: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def __init__(self,
dataset,
samples_per_gpu=1,
num_replicas=None,
rank=None):
if num_replicas is None:
num_replicas = get_world_size()
if rank is None:
rank = get_rank()
self.dataset = dataset
self.samples_per_gpu = samples_per_gpu
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
assert hasattr(self.dataset, 'flag')
self.flag = self.dataset.flag
self.group_sizes = np.bincount(self.flag)
self.num_samples = 0
for i, j in enumerate(self.group_sizes):
self.num_samples += int(
math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
self.num_replicas)) * self.samples_per_gpu
self.total_size = self.num_samples * self.num_replicas
示例4: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
import torch.distributed as dist
super().__init__(dataset)
if num_replicas is None: # pragma: no cover
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None: # pragma: no cover
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
self.shuffle = shuffle
示例5: all_gather_stats_list
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def all_gather_stats_list(stat_list, max_size=4096):
"""
Gather a `Statistics` list accross all processes/nodes
Args:
stat_list(list([`Statistics`])): list of statistics objects to
gather accross all processes/nodes
max_size(int): max buffer size to use
Returns:
our_stats(list([`Statistics`])): list of updated stats
"""
# Get a list of world_size lists with len(stat_list) Statistics objects
all_stats = all_gather_list(stat_list, max_size=max_size)
our_rank = get_rank()
our_stats = all_stats[our_rank]
for other_rank, stats in enumerate(all_stats):
if other_rank == our_rank:
continue
for i, stat in enumerate(stats):
our_stats[i].update(stat, update_n_src_words=True)
return our_stats
示例6: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def __init__(self, dataset, total_iter, batch_size, world_size=None, rank=None, last_iter=-1):
if world_size is None:
world_size = dist.get_world_size()
if rank is None:
rank = dist.get_rank()
assert rank < world_size
self.dataset = dataset
self.total_iter = total_iter
self.batch_size = batch_size
self.world_size = world_size
self.rank = rank
self.last_iter = last_iter
self.total_size = self.total_iter*self.batch_size
self.indices = self.gen_new_list()
self.call = 0
示例7: logging
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def logging(self, msg, level=logging.INFO):
if self.in_distributed_mode():
msg = 'Rank {} {}'.format(dist.get_rank(), msg)
if self.only_master_logging:
if self.is_master_node():
self.logger.log(level, msg)
else:
self.logger.log(level, msg)
示例8: is_master_node
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def is_master_node(self):
if self.in_distributed_mode():
if dist.get_rank() == 0:
return True
else:
return False
else:
return True
示例9: get_rank
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def get_rank():
if not dist.is_available():
return 0
if not dist.is_initialized():
return 0
return dist.get_rank()
示例10: is_main_process
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def is_main_process():
return get_rank() == 0
示例11: reduce_dict
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def reduce_dict(input_dict, average=True):
"""
Args:
input_dict (dict): all the values will be reduced
average (bool): whether to do average or sum
Reduce the values in the dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
input_dict, after reduction.
"""
world_size = get_world_size()
if world_size < 2:
return input_dict
with torch.no_grad():
names = []
values = []
# sort the keys so that they are consistent across processes
for k in sorted(input_dict.keys()):
names.append(k)
values.append(input_dict[k])
values = torch.stack(values, dim=0)
dist.reduce(values, dst=0)
if dist.get_rank() == 0 and average:
# only main process gets accumulated, so only divide by
# world_size in this case
values /= world_size
reduced_dict = {k: v for k, v in zip(names, values)}
return reduced_dict
示例12: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def __init__(self, num_replicas=None, rank=None):
if num_replicas is None:
num_replicas = get_world_size()
if rank is None:
rank = get_rank()
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.extra = 0
示例13: __init__
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def __init__(self, data_source, batch_size, num_replicas=None, rank=None, drop_last=False, epoch=0):
super(DistributedARBatchSampler, self).__init__(data_source, batch_size, drop_last, epoch)
# Automatically get world size and rank if not provided
if num_replicas is None:
num_replicas = distributed.get_world_size()
if rank is None:
rank = distributed.get_rank()
self.num_replicas = num_replicas
self.rank = rank
tot_batches = super(DistributedARBatchSampler, self).__len__()
self.num_batches = int(math.ceil(tot_batches / self.num_replicas))
示例14: summarize_mp
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def summarize_mp(predictions, annotations_file, img_list, log_dir, mask=False):
# Write partial results to file (all workers)
rank = dist.get_rank()
with open(path.join(log_dir, "coco_ap_{:02d}.json".format(rank)), "w") as fid:
json.dump(predictions, fid)
with open(path.join(log_dir, "img_list_{:02d}.json".format(rank)), "w") as fid:
json.dump(img_list, fid)
dist.barrier()
# Merge results from all workers and run evaluation (only rank 0)
if rank == 0:
predictions = []
img_list = []
for i in range(dist.get_world_size()):
coco_ap_file = path.join(log_dir, "coco_ap_{:02d}.json".format(i))
with open(coco_ap_file) as fid:
predictions += json.load(fid)
remove(coco_ap_file)
img_list_file = path.join(log_dir, "img_list_{:02d}.json".format(i))
with open(img_list_file) as fid:
img_list += json.load(fid)
remove(img_list_file)
det_map, msk_map = summarize(predictions, annotations_file, img_list, mask)
else:
det_map, msk_map = 0, 0
dist.barrier()
return det_map, msk_map
示例15: log_debug
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import get_rank [as 别名]
def log_debug(msg, *args, **kwargs):
if distributed.get_rank() == 0:
logging.get_logger().debug(msg, *args, **kwargs)