Python nccl.all_sum方法代码示例

本文整理汇总了Python中tensorflow.contrib.nccl.all_sum方法的典型用法代码示例。如果您正苦于以下问题：Python nccl.all_sum方法的具体用法？Python nccl.all_sum怎么用？Python nccl.all_sum使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow.contrib.nccl的用法示例。

在下文中一共展示了nccl.all_sum方法的9个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: allreduce_grads

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def allreduce_grads(all_grads, average):
    """
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.
    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.
    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)
        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower)
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)
    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret

开发者ID:ratsgo，项目名称:embedding，代码行数:28，代码来源:tune_utils.py

示例2: sum_grad_and_var_all_reduce

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def sum_grad_and_var_all_reduce(grad_and_vars, num_workers, alg, gpu_indices,
                                aux_devices=None, num_shards=1):
  """Apply all-reduce algorithm over specified gradient tensors."""
  # Note that each grad_and_vars looks like the following:
  #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
  scaled_grads = [g for g, _ in grad_and_vars]
  if alg == 'nccl':
    summed_grads = nccl.all_sum(scaled_grads)
  elif alg == 'xring':
    summed_grads = all_reduce.build_ring_all_reduce(
        scaled_grads, num_workers, num_shards, gpu_indices, tf.add)
  elif alg == 'nccl/xring':
    summed_grads = all_reduce.build_nccl_then_ring(scaled_grads, num_shards,
                                                   tf.add)
  elif alg == 'nccl/rechd':
    summed_grads = all_reduce.build_nccl_then_recursive_hd(scaled_grads, tf.add)
  elif alg == 'nccl/pscpu':
    summed_grads = all_reduce.build_nccl_then_shuffle(
        scaled_grads, aux_devices, tf.add, tf.add_n)
  elif alg == 'pscpu/pscpu':
    summed_grads = all_reduce.build_shuffle_then_shuffle(
        scaled_grads, aux_devices,
        # TODO(tucker): devise a way of better specifying the device set
        # for the second level.
        [aux_devices[0]],
        tf.add_n)
  elif alg in ['pscpu', 'psgpu']:
    summed_grads = all_reduce.build_shuffle_all_reduce(
        scaled_grads, aux_devices, tf.add_n)
  else:
    raise ValueError('unsupported all_reduce alg: ', alg)

  result = []
  for (_, v), g in zip(grad_and_vars, summed_grads):
    result.append([g, v])
  return result

开发者ID:awslabs，项目名称:deeplearning-benchmark，代码行数:38，代码来源:variable_mgr.py

示例3: allreduce_grads

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def allreduce_grads(all_grads, average):
    """
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.

    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.

    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """
    from tensorflow.contrib import nccl
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)

        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower)
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)

    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret

开发者ID:alexlee-gk，项目名称:video_prediction，代码行数:33，代码来源:tf_utils.py

示例4: allreduce_grads

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def allreduce_grads(all_grads, average):
    """
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.

    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.

    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """

    if get_tf_version_tuple() <= (1, 12):
        from tensorflow.contrib import nccl
    else:
        from tensorflow.python.ops import nccl_ops as nccl
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)

        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower)
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)

    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret

开发者ID:microsoft，项目名称:petridishnn，代码行数:37，代码来源:utils.py

示例5: _reduced_opt

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def _reduced_opt(self, tower_grads_vars):
    tower_reduced_grads_vars = []
    for grads_vars in zip(*tower_grads_vars):
      grads = [g for g, _ in grads_vars]
      reduced_grads = nccl.all_sum(grads)
      reduced_grads_vars = [(g, v) for (_, v), g in zip(grads_vars, reduced_grads)]
      tower_reduced_grads_vars.append(reduced_grads_vars)

    # Optimizier
    tower_train_ops = []
    grad_state = [list(x) for x in zip(*tower_reduced_grads_vars)]
    for device_id in xrange(self.num_gpus):
      with tf.device('/gpu:%d' % device_id):
        # Gradients of TOWER_(device_id)
        grads = grad_state[device_id]
        # Optimizer configure
        if self.optimizer == 'Momentum':
          opt = tf.train.MomentumOptimizer(self.lr, momentum=0.9)
        elif self.optimizer == 'Adam':
          opt = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999)
        # Tower train_ops
        tower_train_ops.append(opt.apply_gradients(grads))

        print('Optimizer %d has been configured.' % device_id)

    return tower_train_ops, tower_reduced_grads_vars

开发者ID:medivhna，项目名称:TF_Face_Toolbox，代码行数:28，代码来源:data_parallel.py

示例6: all_avg_gradients

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def all_avg_gradients(
        tower_gradvars, devices, param_server_device='/gpu:0', usenccl=True):
    '''Take the average of gradients across devices'''
    if len(devices) == 1:
        return tower_gradvars

    num_devices = len(devices)
    avg_gradvars = []
    for layer in zip(*tower_gradvars):
        grads_on_devices, vars_on_devices = zip(*layer)
        if HAVE_NCCL and usenccl:
            # Note: These nccl ops _must_ be run on all devices, else deadlock
            # print('ALL_AVG_GRADIENTS GRADS_ON_DEVICES:',
            #       grads_on_devices)  # DEBUG
            avg_grads_on_devices = nccl.all_sum(grads_on_devices)
            for idev, device in enumerate(devices):
                with tf.device(device):
                    avg_grads_on_devices[idev] *= 1. / num_devices
        else:
            with tf.device(param_server_device):
                avg_grad = tf.reduce_mean(tf.stack(grads_on_devices), 0)
            avg_grads_on_devices = [avg_grad] * num_devices
        avg_gradvars_on_devices = zip(*(avg_grads_on_devices, vars_on_devices))
        avg_gradvars.append(avg_gradvars_on_devices)

    return list(zip(*avg_gradvars))

开发者ID:avolkov1，项目名称:keras_experiments，代码行数:28，代码来源:optimizers.py

示例7: allreduce_grads

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def allreduce_grads(all_grads, average=True):
    """
    REFERENCE : https://github.com/ppwwyyxx/tensorpack/blob/83e4e187af5765792408e7b7163efd4744d63628/tensorpack/graph_builder/utils.py
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.
    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.
    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """
    from tensorflow.contrib import nccl
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)

        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower, name='allreduce_avg')
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)

    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret

开发者ID:ildoonet，项目名称:tf-mobilenet-v2，代码行数:32，代码来源:train_helper.py

示例8: all_avg_gradients

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def all_avg_gradients(tower_gradvars, devices, param_server_device='/gpu:0'):
    if len(devices) == 1:
        return tower_gradvars

    if have_nccl and FLAGS.nccl:
        new_tower_grads = []
        contig_list = []
        for d, grad_list in zip(devices, tower_gradvars):
            with tf.device(d):
                flat_grads = [tf.reshape(g, [-1]) for (g, _) in grad_list]
                contig_grads = tf.concat(flat_grads, 0)
                contig_list.append(contig_grads)

        summed_grads = nccl.all_sum(contig_list)
        for d, s, grad_list in zip(devices, summed_grads, tower_gradvars):
            with tf.device(d):
                new_grad_list = [];
                sizes = [tf.size(g) for (g, _) in grad_list]
                flat_grads = tf.split(s, sizes)
                for newg, (oldg, v) in zip(flat_grads, grad_list):
                    newg = tf.reshape(newg, tf.shape(oldg))
                    newg *= 1. / len(devices)
                    new_grad_list.append((newg, v))
                new_tower_grads.append(new_grad_list)
        return new_tower_grads
    else:
        num_devices = len(tower_gradvars)
        avg_gradvars = []
        for layer in zip(*tower_gradvars):
            grads_on_devices, vars_on_devices = zip(*layer)
            with tf.device(param_server_device):
                avg_grad = tf.reduce_mean(tf.stack(grads_on_devices), 0)
            avg_grads_on_devices = [avg_grad]*num_devices
            avg_gradvars_on_devices = zip(*(avg_grads_on_devices, vars_on_devices))
            avg_gradvars.append(avg_gradvars_on_devices)
        return list(zip(*avg_gradvars))

开发者ID:HewlettPackard，项目名称:dlcookbook-dlbs，代码行数:38，代码来源:nvcnn.py

示例9: sum_grad_and_var_all_reduce

# 需要导入模块: from tensorflow.contrib import nccl [as 别名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 别名]
def sum_grad_and_var_all_reduce(grad_and_vars,
                                num_workers,
                                alg,
                                gpu_indices,
                                aux_devices=None,
                                num_shards=1):
  """Apply all-reduce algorithm over specified gradient tensors."""
  with tf.name_scope('allreduce'):
    # Note that each grad_and_vars looks like the following:
    #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
    scaled_grads = [g for g, _ in grad_and_vars]
    if alg == 'nccl':
      summed_grads = nccl.all_sum(scaled_grads)
    elif alg == 'xring':
      summed_grads = all_reduce.build_ring_all_reduce(
          scaled_grads, num_workers, num_shards, gpu_indices, tf.add)
    elif alg == 'nccl/xring':
      summed_grads = all_reduce.build_nccl_then_ring(scaled_grads, num_shards,
                                                     tf.add)
    elif alg == 'nccl/rechd':
      summed_grads = all_reduce.build_nccl_then_recursive_hd(
          scaled_grads, tf.add)
    elif alg == 'nccl/pscpu':
      summed_grads = all_reduce.build_nccl_then_shuffle(
          scaled_grads, aux_devices, tf.add, tf.add_n)
    elif alg == 'pscpu/pscpu':
      summed_grads = all_reduce.build_shuffle_then_shuffle(
          scaled_grads,
          aux_devices,
          # TODO(tucker): devise a way of better specifying the device set
          # for the second level.
          [aux_devices[0]],
          tf.add_n)
    elif alg in ['pscpu', 'psgpu']:
      summed_grads = all_reduce.build_shuffle_all_reduce(
          scaled_grads, aux_devices, tf.add_n)
    else:
      raise ValueError('unsupported all_reduce alg: ', alg)

    result = []
    for (_, v), g in zip(grad_and_vars, summed_grads):
      result.append([g, v])
    return result

开发者ID:HewlettPackard，项目名称:dlcookbook-dlbs，代码行数:45，代码来源:allreduce.py

注：本文中的tensorflow.contrib.nccl.all_sum方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。