本文整理汇总了Python中torch.distributed.recv方法的典型用法代码示例。如果您正苦于以下问题:Python distributed.recv方法的具体用法?Python distributed.recv怎么用?Python distributed.recv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.distributed
的用法示例。
在下文中一共展示了distributed.recv方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: receive_tensor_helper
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import recv [as 别名]
def receive_tensor_helper(tensor, src_rank, group, tag, num_iterations,
broadcast):
dist.barrier()
start_time = time.time()
for i in range(num_iterations):
if broadcast:
dist.broadcast(tensor=tensor, group=group, src=src_rank)
else:
dist.recv(tensor=tensor.cpu(), src=src_rank, tag=tag)
end_time = time.time()
dist.barrier()
size = tensor.size()[0]
throughput = (size * 4. * num_iterations) / (
(end_time - start_time) * 10**9)
print("Time to receive %s MB: %.3f seconds" %
((size * 4.) / 10**6,
(end_time - start_time) / num_iterations))
print("Throughput: %.3f GB/s" % throughput)
示例2: _send_recv
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import recv [as 别名]
def _send_recv(rank, rows, columns):
source = 0
tensor = _get_tensor(rank, rows, columns)
logger.debug('Rank: {},\nTensor BEFORE send_recv: {}'.format(rank, tensor))
if rank == 0:
for i in range(1, dist.get_world_size()):
dist.send(tensor=tensor, dst=i)
else:
dist.recv(tensor=tensor, src=source)
logger.debug('Rank: {},\nTensor AFTER send_recv: {}\n'.format(rank, tensor))
assert torch.equal(tensor, _get_tensor(source, rows, columns)),\
'Rank {}: Tensor was not equal to rank {} tensor after send-recv.'.format(rank, source)
示例3: recv
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import recv [as 别名]
def recv(self, tensor, src=None):
"""Receives a tensor from an (optional) source src."""
assert dist.is_initialized(), "initialize the communicator first"
result = tensor.clone()
dist.recv(result.data, src=src, group=self.main_group)
return result
示例4: recv
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import recv [as 别名]
def recv(self, tensor_name, forward_minibatch_id,
backward_minibatch_id, backward=False):
if backward:
index = (backward_minibatch_id + self.rank_in_stage) % \
len(self.backward_receive_queues[tensor_name])
tensor = self.backward_receive_queues[tensor_name][
index].remove()
return tensor
else:
index = self.get_messaging_index(sending=False)
tensor = self.forward_receive_queues[tensor_name][
index].remove()
if tensor.dtype == torch.float32:
tensor = tensor.requires_grad_()
return tensor
示例5: worker
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import recv [as 别名]
def worker():
""" Initialize the distributed environment. """
import torch
import torch.distributed as dist
from torch.multiprocessing import Process
import numpy as np
print("Initializing distributed pytorch")
os.environ['MASTER_ADDR'] = str(args.master_addr)
os.environ['MASTER_PORT'] = str(args.master_port)
# Use TCP backend. Gloo needs nightly, where it currently fails with
# dist.init_process_group('gloo', rank=args.rank,
# AttributeError: module 'torch.distributed' has no attribute 'init_process_group'
dist.init_process_group('tcp', rank=args.rank,
world_size=args.size)
tensor = torch.ones(args.size_mb*250*1000)*(args.rank+1)
time_list = []
outfile = 'out' if args.rank == 0 else '/dev/null'
log = util.FileLogger(outfile)
for i in range(args.iters):
# print('before: rank ', args.rank, ' has data ', tensor[0])
start_time = time.perf_counter()
if args.rank == 0:
dist.send(tensor=tensor, dst=1)
else:
dist.recv(tensor=tensor, src=0)
elapsed_time_ms = (time.perf_counter() - start_time)*1000
time_list.append(elapsed_time_ms)
# print('after: rank ', args.rank, ' has data ', tensor[0])
rate = args.size_mb/(elapsed_time_ms/1000)
log('%03d/%d added %d MBs in %.1f ms: %.2f MB/second' % (i, args.iters, args.size_mb, elapsed_time_ms, rate))
min = np.min(time_list)
median = np.median(time_list)
log(f"min: {min:8.2f}, median: {median:8.2f}, mean: {np.mean(time_list):8.2f}")
示例6: run
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import recv [as 别名]
def run(self):
_LOGGER.info("Started Running!")
self.running = True
while self.running:
_LOGGER.info("Polling for message...")
dist.recv(tensor=self.m_parameter)
self.receive(int(self.m_parameter[0].item()),
MessageCode(self.m_parameter[1].item()),
self.m_parameter[2:])
示例7: _recv
# 需要导入模块: from torch import distributed [as 别名]
# 或者: from torch.distributed import recv [as 别名]
def _recv(tensor_name, src_rank, tensor_shape=None, dtype=torch.float32,
tensor=None, tag=None, sub_process_group=None):
"""
Receives tensor by calling PyTorch's recv() call.
Tensor will be copied to GPU prior to return.
"""
assert tag is not None
if tensor is None:
assert tensor_shape is not None
assert dtype is not None
assert dtype != torch.float16
if sub_process_group is not None:
# Receive tensor shape.
received_tensor_shape = torch.zeros(len(tensor_shape),
dtype=torch.int)
dist.broadcast(tensor=received_tensor_shape,
src=src_rank,
group=sub_process_group)
received_tensor_shape = list(map(lambda x: int(x),
received_tensor_shape))
# Receive tensor.
tensor = torch.zeros(received_tensor_shape, dtype=dtype).cuda()
dist.broadcast(tensor=tensor,
src=src_rank,
group=sub_process_group)
else:
# Receive tensor shape.
received_tensor_shape = torch.zeros(len(tensor_shape),
dtype=torch.int)
dist.recv(tensor=received_tensor_shape,
src=src_rank,
tag=tag)
received_tensor_shape = list(map(lambda x: int(x),
received_tensor_shape))
# Receive tensor.
tensor = torch.zeros(received_tensor_shape, dtype=dtype)
dist.recv(tensor=tensor,
src=src_rank,
tag=tag)
tensor = tensor.cuda()
assert tensor.is_cuda
return tensor