當前位置: 首頁>>代碼示例>>Python>>正文


Python nccl.all_sum方法代碼示例

本文整理匯總了Python中tensorflow.contrib.nccl.all_sum方法的典型用法代碼示例。如果您正苦於以下問題:Python nccl.all_sum方法的具體用法?Python nccl.all_sum怎麽用?Python nccl.all_sum使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在tensorflow.contrib.nccl的用法示例。


在下文中一共展示了nccl.all_sum方法的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: allreduce_grads

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def allreduce_grads(all_grads, average):
    """
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.
    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.
    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)
        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower)
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)
    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret 
開發者ID:ratsgo,項目名稱:embedding,代碼行數:28,代碼來源:tune_utils.py

示例2: sum_grad_and_var_all_reduce

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def sum_grad_and_var_all_reduce(grad_and_vars, num_workers, alg, gpu_indices,
                                aux_devices=None, num_shards=1):
  """Apply all-reduce algorithm over specified gradient tensors."""
  # Note that each grad_and_vars looks like the following:
  #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
  scaled_grads = [g for g, _ in grad_and_vars]
  if alg == 'nccl':
    summed_grads = nccl.all_sum(scaled_grads)
  elif alg == 'xring':
    summed_grads = all_reduce.build_ring_all_reduce(
        scaled_grads, num_workers, num_shards, gpu_indices, tf.add)
  elif alg == 'nccl/xring':
    summed_grads = all_reduce.build_nccl_then_ring(scaled_grads, num_shards,
                                                   tf.add)
  elif alg == 'nccl/rechd':
    summed_grads = all_reduce.build_nccl_then_recursive_hd(scaled_grads, tf.add)
  elif alg == 'nccl/pscpu':
    summed_grads = all_reduce.build_nccl_then_shuffle(
        scaled_grads, aux_devices, tf.add, tf.add_n)
  elif alg == 'pscpu/pscpu':
    summed_grads = all_reduce.build_shuffle_then_shuffle(
        scaled_grads, aux_devices,
        # TODO(tucker): devise a way of better specifying the device set
        # for the second level.
        [aux_devices[0]],
        tf.add_n)
  elif alg in ['pscpu', 'psgpu']:
    summed_grads = all_reduce.build_shuffle_all_reduce(
        scaled_grads, aux_devices, tf.add_n)
  else:
    raise ValueError('unsupported all_reduce alg: ', alg)

  result = []
  for (_, v), g in zip(grad_and_vars, summed_grads):
    result.append([g, v])
  return result 
開發者ID:awslabs,項目名稱:deeplearning-benchmark,代碼行數:38,代碼來源:variable_mgr.py

示例3: allreduce_grads

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def allreduce_grads(all_grads, average):
    """
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.

    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.

    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """
    from tensorflow.contrib import nccl
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)

        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower)
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)

    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret 
開發者ID:alexlee-gk,項目名稱:video_prediction,代碼行數:33,代碼來源:tf_utils.py

示例4: allreduce_grads

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def allreduce_grads(all_grads, average):
    """
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.

    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.

    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """

    if get_tf_version_tuple() <= (1, 12):
        from tensorflow.contrib import nccl
    else:
        from tensorflow.python.ops import nccl_ops as nccl
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)

        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower)
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)

    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret 
開發者ID:microsoft,項目名稱:petridishnn,代碼行數:37,代碼來源:utils.py

示例5: _reduced_opt

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def _reduced_opt(self, tower_grads_vars):
    tower_reduced_grads_vars = []
    for grads_vars in zip(*tower_grads_vars):
      grads = [g for g, _ in grads_vars]
      reduced_grads = nccl.all_sum(grads)
      reduced_grads_vars = [(g, v) for (_, v), g in zip(grads_vars, reduced_grads)]
      tower_reduced_grads_vars.append(reduced_grads_vars)

    # Optimizier
    tower_train_ops = []
    grad_state = [list(x) for x in zip(*tower_reduced_grads_vars)]
    for device_id in xrange(self.num_gpus):
      with tf.device('/gpu:%d' % device_id):
        # Gradients of TOWER_(device_id)
        grads = grad_state[device_id]
        # Optimizer configure
        if self.optimizer == 'Momentum':
          opt = tf.train.MomentumOptimizer(self.lr, momentum=0.9)
        elif self.optimizer == 'Adam':
          opt = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999)
        # Tower train_ops
        tower_train_ops.append(opt.apply_gradients(grads))

        print('Optimizer %d has been configured.' % device_id)

    return tower_train_ops, tower_reduced_grads_vars 
開發者ID:medivhna,項目名稱:TF_Face_Toolbox,代碼行數:28,代碼來源:data_parallel.py

示例6: all_avg_gradients

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def all_avg_gradients(
        tower_gradvars, devices, param_server_device='/gpu:0', usenccl=True):
    '''Take the average of gradients across devices'''
    if len(devices) == 1:
        return tower_gradvars

    num_devices = len(devices)
    avg_gradvars = []
    for layer in zip(*tower_gradvars):
        grads_on_devices, vars_on_devices = zip(*layer)
        if HAVE_NCCL and usenccl:
            # Note: These nccl ops _must_ be run on all devices, else deadlock
            # print('ALL_AVG_GRADIENTS GRADS_ON_DEVICES:',
            #       grads_on_devices)  # DEBUG
            avg_grads_on_devices = nccl.all_sum(grads_on_devices)
            for idev, device in enumerate(devices):
                with tf.device(device):
                    avg_grads_on_devices[idev] *= 1. / num_devices
        else:
            with tf.device(param_server_device):
                avg_grad = tf.reduce_mean(tf.stack(grads_on_devices), 0)
            avg_grads_on_devices = [avg_grad] * num_devices
        avg_gradvars_on_devices = zip(*(avg_grads_on_devices, vars_on_devices))
        avg_gradvars.append(avg_gradvars_on_devices)

    return list(zip(*avg_gradvars)) 
開發者ID:avolkov1,項目名稱:keras_experiments,代碼行數:28,代碼來源:optimizers.py

示例7: allreduce_grads

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def allreduce_grads(all_grads, average=True):
    """
    REFERENCE : https://github.com/ppwwyyxx/tensorpack/blob/83e4e187af5765792408e7b7163efd4744d63628/tensorpack/graph_builder/utils.py
    All-reduce average the gradients among K devices. Results are broadcasted to all devices.
    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        average (bool): average gradients or not.
    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """
    from tensorflow.contrib import nccl
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads
    new_all_grads = []  # N x K
    for grads in zip(*all_grads):
        summed = nccl.all_sum(grads)

        grads_for_devices = []  # K
        for g in summed:
            with tf.device(g.device):
                # tensorflow/benchmarks didn't average gradients
                if average:
                    g = tf.multiply(g, 1.0 / nr_tower, name='allreduce_avg')
            grads_for_devices.append(g)
        new_all_grads.append(grads_for_devices)

    # transpose to K x N
    ret = list(zip(*new_all_grads))
    return ret 
開發者ID:ildoonet,項目名稱:tf-mobilenet-v2,代碼行數:32,代碼來源:train_helper.py

示例8: all_avg_gradients

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def all_avg_gradients(tower_gradvars, devices, param_server_device='/gpu:0'):
    if len(devices) == 1:
        return tower_gradvars

    if have_nccl and FLAGS.nccl:
        new_tower_grads = []
        contig_list = []
        for d, grad_list in zip(devices, tower_gradvars):
            with tf.device(d):
                flat_grads = [tf.reshape(g, [-1]) for (g, _) in grad_list]
                contig_grads = tf.concat(flat_grads, 0)
                contig_list.append(contig_grads)

        summed_grads = nccl.all_sum(contig_list)
        for d, s, grad_list in zip(devices, summed_grads, tower_gradvars):
            with tf.device(d):
                new_grad_list = [];
                sizes = [tf.size(g) for (g, _) in grad_list]
                flat_grads = tf.split(s, sizes)
                for newg, (oldg, v) in zip(flat_grads, grad_list):
                    newg = tf.reshape(newg, tf.shape(oldg))
                    newg *= 1. / len(devices)
                    new_grad_list.append((newg, v))
                new_tower_grads.append(new_grad_list)
        return new_tower_grads
    else:
        num_devices = len(tower_gradvars)
        avg_gradvars = []
        for layer in zip(*tower_gradvars):
            grads_on_devices, vars_on_devices = zip(*layer)
            with tf.device(param_server_device):
                avg_grad = tf.reduce_mean(tf.stack(grads_on_devices), 0)
            avg_grads_on_devices = [avg_grad]*num_devices
            avg_gradvars_on_devices = zip(*(avg_grads_on_devices, vars_on_devices))
            avg_gradvars.append(avg_gradvars_on_devices)
        return list(zip(*avg_gradvars)) 
開發者ID:HewlettPackard,項目名稱:dlcookbook-dlbs,代碼行數:38,代碼來源:nvcnn.py

示例9: sum_grad_and_var_all_reduce

# 需要導入模塊: from tensorflow.contrib import nccl [as 別名]
# 或者: from tensorflow.contrib.nccl import all_sum [as 別名]
def sum_grad_and_var_all_reduce(grad_and_vars,
                                num_workers,
                                alg,
                                gpu_indices,
                                aux_devices=None,
                                num_shards=1):
  """Apply all-reduce algorithm over specified gradient tensors."""
  with tf.name_scope('allreduce'):
    # Note that each grad_and_vars looks like the following:
    #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
    scaled_grads = [g for g, _ in grad_and_vars]
    if alg == 'nccl':
      summed_grads = nccl.all_sum(scaled_grads)
    elif alg == 'xring':
      summed_grads = all_reduce.build_ring_all_reduce(
          scaled_grads, num_workers, num_shards, gpu_indices, tf.add)
    elif alg == 'nccl/xring':
      summed_grads = all_reduce.build_nccl_then_ring(scaled_grads, num_shards,
                                                     tf.add)
    elif alg == 'nccl/rechd':
      summed_grads = all_reduce.build_nccl_then_recursive_hd(
          scaled_grads, tf.add)
    elif alg == 'nccl/pscpu':
      summed_grads = all_reduce.build_nccl_then_shuffle(
          scaled_grads, aux_devices, tf.add, tf.add_n)
    elif alg == 'pscpu/pscpu':
      summed_grads = all_reduce.build_shuffle_then_shuffle(
          scaled_grads,
          aux_devices,
          # TODO(tucker): devise a way of better specifying the device set
          # for the second level.
          [aux_devices[0]],
          tf.add_n)
    elif alg in ['pscpu', 'psgpu']:
      summed_grads = all_reduce.build_shuffle_all_reduce(
          scaled_grads, aux_devices, tf.add_n)
    else:
      raise ValueError('unsupported all_reduce alg: ', alg)

    result = []
    for (_, v), g in zip(grad_and_vars, summed_grads):
      result.append([g, v])
    return result 
開發者ID:HewlettPackard,項目名稱:dlcookbook-dlbs,代碼行數:45,代碼來源:allreduce.py


注:本文中的tensorflow.contrib.nccl.all_sum方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。