当前位置: 首页>>代码示例>>Python>>正文


Python tensorflow.clip_by_norm函数代码示例

本文整理汇总了Python中tensorflow.clip_by_norm函数的典型用法代码示例。如果您正苦于以下问题:Python clip_by_norm函数的具体用法?Python clip_by_norm怎么用?Python clip_by_norm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了clip_by_norm函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: clip_by_norm

def clip_by_norm(gvs, grad_norm_thresh, scope="grad_clip"):
    """
    Clip gradients by norm, and scope.

    Args:
      gvs: list of gradient variable tuples
      grad_norm_thresh: norm threshold to clip
      scope: scope for the clip operation
    """
    new_gvs = []
    if scope:
        with tf.name_scope(scope):
            #gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
            #       for gv in gvs if gv[0]]
            #return gvs
            for gv in gvs:
                if gv[0]:
                    new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]))
                else:
                    print("no gradient for %s" % gv[1].op.name)
                    #raise
                    new_gvs.append(gv)
            return new_gvs
    else:
        #gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
        #       for gv in gvs if gv[0]]
        #return gvs
        for gv in gvs:
            if gv[0]:
                new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]))
            else:
                print("no gradient for %s" % gv[1].op.name)
                #raise
                new_gvs.append(gv)
        return new_gvs        
开发者ID:ulysseses,项目名称:sr_exp2,代码行数:35,代码来源:tools.py

示例2: two_linear

  def two_linear( self, xin, linear_size, residual, dropout_keep_prob, max_norm, batch_norm, dtype, idx ):
    """
    Make a bi-linear block with optional residual connection

    Args
      xin: the batch that enters the block
      linear_size: integer. The size of the linear units
      residual: boolean. Whether to add a residual connection
      dropout_keep_prob: float [0,1]. Probability of dropping something out
      max_norm: boolean. Whether to clip weights to 1-norm
      batch_norm: boolean. Whether to do batch normalization
      dtype: type of the weigths. Usually tf.float32
      idx: integer. Number of layer (for naming/scoping)
    Returns
      y: the batch after it leaves the block
    """

    with vs.variable_scope( "two_linear_"+str(idx) ) as scope:

      input_size = int(xin.get_shape()[1])

      # Linear 1
      w2 = tf.get_variable( name="w2_"+str(idx), initializer=kaiming, shape=[input_size, linear_size], dtype=dtype)
      b2 = tf.get_variable( name="b2_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype)
      w2 = tf.clip_by_norm(w2,1) if max_norm else w2
      y = tf.matmul(xin, w2) + b2
      if  batch_norm:
        y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization1"+str(idx))

      y = tf.nn.relu( y )
      y = tf.nn.dropout( y, dropout_keep_prob )

      # Linear 2
      w3 = tf.get_variable( name="w3_"+str(idx), initializer=kaiming, shape=[linear_size, linear_size], dtype=dtype)
      b3 = tf.get_variable( name="b3_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype)
      w3 = tf.clip_by_norm(w3,1) if max_norm else w3
      y = tf.matmul(y, w3) + b3

      if  batch_norm:
        y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization2"+str(idx))

      y = tf.nn.relu( y )
      y = tf.nn.dropout( y, dropout_keep_prob )

      # Residual every 2 blocks
      y = (xin + y) if residual else y

    return y
开发者ID:neherh,项目名称:3d-pose-baseline,代码行数:48,代码来源:linear_model.py

示例3: __init__

  def __init__(self, sess, pred_network, env, stat, conf, target_network=None):
    super(DeepQ, self).__init__(sess, pred_network, target_network, env, stat, conf)

    # Optimizer
    with tf.variable_scope('optimizer'):
      self.targets = tf.placeholder('float32', [None], name='target_q_t')
      self.actions = tf.placeholder('int64', [None], name='action')

      actions_one_hot = tf.one_hot(self.actions, self.env.action_size, 1.0, 0.0, name='action_one_hot')
      pred_q = tf.reduce_sum(self.pred_network.outputs * actions_one_hot, reduction_indices=1, name='q_acted')

      self.delta = self.targets - pred_q
      if self.max_delta and self.min_delta:
        self.delta = tf.clip_by_value(self.delta, self.min_delta, self.max_delta, name='clipped_delta')

      self.loss = tf.reduce_mean(tf.square(self.delta), name='loss')

      self.learning_rate_op = tf.maximum(self.learning_rate_minimum,
          tf.train.exponential_decay(
              self.learning_rate,
              self.stat.t_op,
              self.learning_rate_decay_step,
              self.learning_rate_decay,
              staircase=True))

      optimizer = tf.train.RMSPropOptimizer(
        self.learning_rate_op, momentum=0.95, epsilon=0.01)
      
      grads_and_vars = optimizer.compute_gradients(self.loss)
      for idx, (grad, var) in enumerate(grads_and_vars):
        if grad is not None:
          grads_and_vars[idx] = (tf.clip_by_norm(grad, self.max_grad_norm), var)
      self.optim = optimizer.apply_gradients(grads_and_vars)
开发者ID:ashiqrh,项目名称:deep-rl-tensorflow,代码行数:33,代码来源:deep_q.py

示例4: dpg

def dpg(q_max, a_max, dqda_clipping=None, clip_norm=False, name="DpgLearning"):
  """Implements the Deterministic Policy Gradient (DPG) loss as a TensorFlow Op.

  This op implements the loss for the `actor`, the `critic` can instead be
  updated by minimizing the `value_ops.td_learning` loss.

  See "Deterministic Policy Gradient Algorithms" by Silver, Lever, Heess,
  Degris, Wierstra, Riedmiller (http://proceedings.mlr.press/v32/silver14.pdf).

  Args:
    q_max: Tensor holding Q-values generated by Q network with the input of
      (state, a_max) pair, shape `[B]`.
    a_max: Tensor holding the optimal action, shape `[B, action_dimension]`.
    dqda_clipping: `int` or `float`, clips the gradient dqda element-wise
      between `[-dqda_clipping, dqda_clipping]`.
    clip_norm: Whether to perform dqda clipping on the vector norm of the last
      dimension, or component wise (default).
    name: name to prefix ops created within this op.

  Returns:
    A namedtuple with fields:

    * `loss`: a tensor containing the batch of losses, shape `[B]`.
    * `extra`: a namedtuple with fields:
        * `q_max`: Tensor holding the optimal Q values, `[B]`.
        * `a_max`: Tensor holding the optimal action, `[B, action_dimension]`.
        * `dqda`: Tensor holding the derivative dq/da, `[B, action_dimension]`.

  Raises:
    ValueError: If `q_max` doesn't depend on `a_max` or if `dqda_clipping <= 0`.
  """

  # DPG op.
  with tf.name_scope(name, values=[q_max, a_max]):

    # Calculate the gradient dq/da.
    dqda = tf.gradients([q_max], [a_max])[0]

    # Check that `q_max` depends on `a_max`.
    if dqda is None:
      raise ValueError("q_max needs to be a function of a_max")

    # Clipping the gradient dq/da.
    if dqda_clipping is not None:
      if dqda_clipping <= 0:
        raise ValueError("dqda_clipping should be bigger than 0, {} found"
                         .format(dqda_clipping))
      if clip_norm:
        dqda = tf.clip_by_norm(dqda, dqda_clipping, axes=-1)
      else:
        dqda = tf.clip_by_value(dqda, -1. * dqda_clipping, dqda_clipping)

    # Target_a ensures correct gradient calculated during backprop.
    target_a = dqda + a_max
    # Stop the gradient going through Q network when backprop.
    target_a = tf.stop_gradient(target_a)
    # Gradient only go through actor network.
    loss = 0.5 * tf.reduce_sum(tf.square(target_a - a_max), axis=-1)
    return base_ops.LossOutput(
        loss, DPGExtra(q_max=q_max, a_max=a_max, dqda=dqda))
开发者ID:wmiao1769,项目名称:trfl,代码行数:60,代码来源:dpg_ops.py

示例5: _init_train

    def _init_train(self):
        readout = tf.stop_gradient(self.target_network.readout)

        # 0 if terminal, max(prediction) if not
        future_rewards = tf.reduce_max(readout, reduction_indices=[1,]) * (1 - self.terminals)
        tf.histogram_summary("rewards_future", future_rewards)

        wanted = self.rewards + self.settings['discount'] * future_rewards
        tf.histogram_summary("rewards_wanted", wanted)

        current = tf.reduce_sum(
                self.act_network.readout * self.action_mask,
                reduction_indices=[1,],
                name="rewards_current"
            )
        tf.histogram_summary("rewards_current", current)

        loss = tf.square(current - wanted)
        self.error = tf.reduce_sum(loss, name="prediction_error")

        tf.scalar_summary('error', self.error)

        grad_vars = self.settings['optimizer'].compute_gradients(self.error)

        clipped_grad_vars = [(tf.clip_by_norm(grad, 10) if grad else None, var)
                for (grad, var) in grad_vars]

        for grad, var in clipped_grad_vars:
            tf.histogram_summary(var.name, var)
            if grad:
                tf.histogram_summary(var.name + "_clipgrad", grad)

        self.train_op = self.settings['optimizer'].apply_gradients(clipped_grad_vars, global_step=self.global_step)
开发者ID:amharc,项目名称:jnp3,代码行数:33,代码来源:model.py

示例6: make_tf_Linv

def make_tf_Linv(layer, V_shape, c_shape, lr, act=tf.nn.tanh):
  """ builds graph for layer-local training of V and c """
  with tf.name_scope('layer'+str(layer)+'_inv') as scope:

    V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.orthogonal_initializer(0.95))
    #V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32))
    c = tf.get_variable(scope+'c', shape=c_shape, dtype=tf.float32, initializer=tf.constant_initializer(0.))
    
    W = tf.placeholder(tf.float32, shape=[V_shape[1], V_shape[0]], name='W')
    b = tf.placeholder(tf.float32, shape=[1, V_shape[0]], name='b')
    x_0 = tf.placeholder(tf.float32, shape=[None, V_shape[1]], name='input')
    
    fx = act(tf.matmul(x_0, W) + b)
    loss = 0.5*tf.reduce_mean((act(tf.matmul(fx, V) + c) - x_0)**2, name='loss')  
    
    s1 = tf.summary.scalar('log_loss'+str(layer), tf.log(loss))
    s2 = tf.summary.histogram('V'+str(layer), V)
    s3 = tf.summary.histogram('c'+str(layer), c) 
    
    opt = tf.train.RMSPropOptimizer(lr)
    gvs = opt.compute_gradients(loss, var_list=[V, c])
    sg  = [tf.summary.scalar('norm_grad'+var.name[-3], tf.nn.l2_loss(grad)) for grad, var in gvs] # var.name = 'namescope/V:0' and we want just 'V'
    clipped_gvs = [(tf.clip_by_norm(grad, 100.), var) for grad, var in gvs]
    
    return opt.apply_gradients(clipped_gvs), tf.summary.merge([s1] + sg)
开发者ID:jsseely,项目名称:tensorflow-target-prop,代码行数:25,代码来源:tprop_train_stable.py

示例7: create_update_op_backup

 def create_update_op_backup(self):
     optimizer = tf.train.MomentumOptimizer(self.config.learning_rate, self.config.momentum)
     #self.update_op = optimizer.minimize(self.loss)
     
     g_list = optimizer.compute_gradients(self.loss)
     
     # 000
     g_list_new = [(tf.clip_by_norm(g, 5), v) for g, v in g_list]
     # g_list_new = []
     # for g, v in g_list:
         # g_not_finite = tf.logical_or(tf.is_nan(g), tf.is_inf(g))
         
         # 001
         # g = tf.select(g_not_finite, tf.zeros_like(g), g)
         # g = tf.clip_by_norm(g, 5)
         # g = tf.select(g_not_finite, 0.1*v, g)
         
         # 002
         # g = tf.convert_to_tensor(g)
         # g_norm = tf.sqrt(tf.reduce_sum(tf.square(g)))
         # g = tf.select(g_not_finite, 0.1*v, g*5/g_norm)
         
         # g_list_new.append((g, v))
     
     self.update_op = optimizer.apply_gradients(g_list_new)
     return
开发者ID:jjery2243542,项目名称:tf_rnn,代码行数:26,代码来源:tf_rnn.py

示例8: train

def train(lr, total_loss, global_step):
    # Variables that affect learning rate.

    # Compute gradients.
    #with tf.control_dependencies([loss_averages_op]):
    opt = tf.train.GradientDescentOptimizer(lr)
    grads = opt.compute_gradients(total_loss)

    # Add histograms for gradients.
    for i, (grad, var) in enumerate(grads):
        if grad is not None:
            tf.histogram_summary(var.op.name + '/gradients', grad)
            grads[i] = (tf.clip_by_norm(grad, 5), var)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op
开发者ID:danfeiX,项目名称:drl,代码行数:29,代码来源:dqn.py

示例9: create_variables_for_optimization

  def create_variables_for_optimization(self):
    with tf.name_scope("optimization"):
      with tf.name_scope("masker"):
          self.mask = tf.sequence_mask(self.seq_len, self.num_step)
          self.mask = tf.reshape(tf.cast(self.mask, tf.float32), (-1,))
      if self.loss_function == "cross_entropy":
        self.pl_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                                            logits=self.logit,
                                            labels=self.actions_flatten)
      elif self.loss_function == "l2":
        self.one_hot_actions = tf.one_hot(self.actions_flatten, self.num_actions)
        self.pl_loss = tf.reduce_mean((self.probs - self.one_hot_actions) ** 2,
                                            axis=1)
      else:
          raise ValueError("loss function type is not defined")

      self.pl_loss = tf.multiply(self.pl_loss, self.mask)
      self.pl_loss = tf.reduce_mean(tf.multiply(self.pl_loss, self.returns_flatten))

      self.entropy = tf.multiply(self.entropy, self.mask)
      self.entropy = tf.reduce_mean(self.entropy)

      self.loss = self.pl_loss - self.entropy_bonus * self.entropy

      self.trainable_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="policy_network")
      self.gradients = self.optimizer.compute_gradients(self.loss, var_list=self.trainable_variables)
      self.clipped_gradients = [(tf.clip_by_norm(grad, self.max_gradient), var)
                                  for grad, var in self.gradients]
      self.train_op = self.optimizer.apply_gradients(self.clipped_gradients,
                                                     self.global_step)
      self.grad_norm = tf.global_norm([grad for grad, var in self.gradients])
      self.var_norm = tf.global_norm(self.trainable_variables)
开发者ID:csawtelle,项目名称:pg_rnn,代码行数:32,代码来源:pg_rnn.py

示例10: make_accumulated_gradients

  def make_accumulated_gradients(self):
    reset_accum_grads = []
    new_grads_and_vars = []

    # 1. Prepare accum_grads
    self.accum_grads = {}
    self.add_accum_grads = {}

    for step, network in enumerate(self.networks):
      grads_and_vars = self.global_optim.compute_gradients(network.total_loss, network.w.values())
      _add_accum_grads = []

      for grad, var in tuple(grads_and_vars):
        if grad is not None:
          shape = grad.get_shape().as_list()

          name = 'accum/%s' % "/".join(var.name.split(':')[0].split('/')[-3:])
          if step == 0:
            self.accum_grads[name] = tf.Variable(
                tf.zeros(shape), trainable=False, name=name)

            global_v = global_var[re.sub(r'.*\/A3C_\d+\/', '', var.name)]
            new_grads_and_vars.append((tf.clip_by_norm(self.accum_grads[name].ref(), self.max_grad_norm), global_v))

            reset_accum_grads.append(self.accum_grads[name].assign(tf.zeros(shape)))

          _add_accum_grads.append(tf.assign_add(self.accum_grads[name], grad))

      # 2. Add gradient to accum_grads
      self.add_accum_grads[step] = tf.group(*_add_accum_grads)
开发者ID:BinbinBian,项目名称:deep-rl-tensorflow,代码行数:30,代码来源:n_step_q.py

示例11: _clip_gradients

    def _clip_gradients(self, grads_and_vars):
        """Clip gradients.
        Args:
            grads_and_vars (list): list of tuples of `(grads, vars)`
        Returns:
            clipped_grads_and_vars (list): list of tuple of
                `(clipped grads, vars)`
        """
        # TODO: Optionally add gradient noise

        clipped_grads_and_vars = []

        # Clip gradient norm
        for grad, var in grads_and_vars:
            if grad is not None:
                clipped_grads_and_vars.append(
                    (tf.clip_by_norm(grad, clip_norm=self.clip_grad_norm),
                     var))

        # Clip gradient
        # for grad, var in grads_and_vars:
        #     if grad is not None:
        #         clipped_grads_and_vars.append(
        #             (tf.clip_by_value(grad,
        #                               clip_value_min=-self.clip_grad_norm,
        #                               clip_value_max=self.clip_grad_norm),
        #              var))

        # TODO: Add histograms for variables, gradients (norms)
        # self._tensorboard(trainable_vars)

        return clipped_grads_and_vars
开发者ID:seasky100,项目名称:tensorflow_end2end_speech_recognition,代码行数:32,代码来源:model_base.py

示例12: __init__

    def __init__(self, optimizer, devices, input_placeholders,
                 per_device_batch_size, build_loss, logdir,
                 grad_norm_clipping=None):
        self.optimizer = optimizer
        self.devices = devices
        self.batch_size = per_device_batch_size * len(devices)
        self.per_device_batch_size = per_device_batch_size
        self.input_placeholders = input_placeholders
        self.build_loss = build_loss
        self.logdir = logdir

        # First initialize the shared loss network
        with tf.variable_scope(TOWER_SCOPE_NAME):
            self._shared_loss = build_loss(*input_placeholders)

        # Then setup the per-device loss graphs that use the shared weights
        self._batch_index = tf.placeholder(tf.int32)

        # Split on the CPU in case the data doesn't fit in GPU memory.
        with tf.device("/cpu:0"):
            data_splits = zip(
                *[tf.split(ph, len(devices)) for ph in input_placeholders])

        self._towers = []
        for device, device_placeholders in zip(self.devices, data_splits):
            self._towers.append(self._setup_device(device,
                                                   device_placeholders))

        avg = average_gradients([t.grads for t in self._towers])
        if grad_norm_clipping:
            for i, (grad, var) in enumerate(avg):
                if grad is not None:
                    avg[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var)
        self._train_op = self.optimizer.apply_gradients(avg)
开发者ID:adgirish,项目名称:ray,代码行数:34,代码来源:multi_gpu_impl.py

示例13: build_model

  def build_model(self, mode, embedding_method):
    self.build_memory()
    # self.skip_model = skip.load_model()
    self.skip_model = None
    self.reg_loss = tf.mul(tf.nn.l2_loss(self.T), self.gamma, name='regularization_loss')
    self.data_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.probs, self.target, name='data_loss')
    self.loss = tf.add(self.reg_loss, self.data_loss, name = 'total_loss')
    self.average_loss = tf.reduce_mean(self.loss)
    self.opt = tf.train.GradientDescentOptimizer(self.lr)
    self.correct_prediction = tf.equal(self.target, tf.argmax(self.probs,1))
    self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))

    grads_and_vars = self.opt.compute_gradients(self.loss, self.params)
    cliped_grads_and_vars = [(tf.clip_by_norm(gv[0], 40), gv[1]) for gv in grads_and_vars]
    inc_op = self.global_step.assign_add(1)
    with tf.control_dependencies([inc_op]):
      self.apply_grad_op = self.opt.apply_gradients(cliped_grads_and_vars)

    self.saver = tf.train.Saver()

    # At Inference mode
    if mode == 'inference':
        if embedding_method == 'word2vec':
            self.saver.restore(self.sess, './demo/MN_shortcut/model.ckpt')
        elif embedding_method == 'skip':
            print 'Restoring model from ./demo/MN_shortcut/skip_plot_40.ckpt'
            self.saver.restore(self.sess, './demo/MN_shortcut/skip_plot_40.ckpt')
    else:
        tf.initialize_all_variables().run()
开发者ID:fodrh1201,项目名称:demo_movieQA,代码行数:29,代码来源:model.py

示例14: adv_target_net2

def adv_target_net2(input_images, clip_norm=1.5):
    with tf.variable_scope('adv_encoder') as scope:
        width = 32
        height = 32
        batch_size = 128
        # code_length = 6000

        input_images = input_images/255

        # clip bound box
        mean, var = tf.nn.moments(input_images, axes=tuple(range(1,len(input_images.shape))), keep_dims=True)
        normed_input_images = (input_images-mean)/var

        # Convolutional layer 1
        conv1 = tf.layers.conv2d(inputs=normed_input_images,
                                 filters=32,
                                 kernel_size=(5, 5),
                                 # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 activation=tf.nn.leaky_relu,
                                 padding='SAME',
                                 name='adv_conv1')

        # maxpool layer1
        maxpool1 = tf.layers.max_pooling2d(conv1, (3,3), (2,2), 'SAME')
        
        # Convolutional layer 2
        conv2 = tf.layers.conv2d(inputs=maxpool1,
                                 filters=64,
                                 kernel_size=(5, 5),
                                 # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                 activation=tf.nn.leaky_relu,
                                 padding='SAME',
                                 name='adv_conv2')

        # maxpool layer2
        maxpool2 = tf.layers.max_pooling2d(conv2, (3,3), (2,2), 'SAME')

        deconv1 = tf.layers.conv2d_transpose(maxpool2, 32, (5,5), (2,2), 'SAME',
                                             activation=tf.nn.leaky_relu,
                                             name='adv_deconv1')

        adv_mask = tf.layers.conv2d_transpose(deconv1, 3, (5,5), (2,2), 'SAME',
                                             activation=tf.nn.tanh,
                                             name='adv_deconv2')

        scaled_adv_mask = tf.clip_by_norm(adv_mask, clip_norm, axes=list(range(1,len(adv_mask.shape))))
        adv_images = tf.clip_by_value(scaled_adv_mask+input_images,0,1)
        output_images = tf.reshape(adv_images, (batch_size, height, width, 3)) * 255.0
        

        dif = adv_images - input_images

        tf.summary.image('adv_images', output_images)

        # Reconstruction L2 loss
        mean_square_error = tf.reduce_mean(tf.square(dif), axis=list(range(1,len(dif.shape))))
        loss = tf.reduce_mean(mean_square_error, name='dis_loss')
        
    return loss, output_images
开发者ID:Jack-lx-jiang,项目名称:Adversarial-Example-Generative-Net,代码行数:59,代码来源:adv_net.py

示例15: build_model

  def build_model(self, reuse, dev, ntype):
    with tf.variable_scope(self.name) and tf.device(dev):
      if reuse:
        tf.get_variable_scope().reuse_variables()
        assert tf.get_variable_scope().reuse

      # Set inputs of networks
      self.minimap = tf.placeholder(tf.float32, [None, U.minimap_channel(), self.msize, self.msize], name='minimap')
      self.screen = tf.placeholder(tf.float32, [None, U.screen_channel(), self.ssize, self.ssize], name='screen')
      self.info = tf.placeholder(tf.float32, [None, self.isize], name='info')

      # Build networks
      net = build_net(self.minimap, self.screen, self.info, self.msize, self.ssize, len(actions.FUNCTIONS), ntype)
      self.spatial_action, self.non_spatial_action, self.value = net

      # Set targets and masks
      self.valid_spatial_action = tf.placeholder(tf.float32, [None], name='valid_spatial_action')
      self.spatial_action_selected = tf.placeholder(tf.float32, [None, self.ssize**2], name='spatial_action_selected')
      self.valid_non_spatial_action = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='valid_non_spatial_action')
      self.non_spatial_action_selected = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='non_spatial_action_selected')
      self.value_target = tf.placeholder(tf.float32, [None], name='value_target')

      # Compute log probability
      spatial_action_prob = tf.reduce_sum(self.spatial_action * self.spatial_action_selected, axis=1)
      spatial_action_log_prob = tf.log(tf.clip_by_value(spatial_action_prob, 1e-10, 1.))
      non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.non_spatial_action_selected, axis=1)
      valid_non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.valid_non_spatial_action, axis=1)
      valid_non_spatial_action_prob = tf.clip_by_value(valid_non_spatial_action_prob, 1e-10, 1.)
      non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob
      non_spatial_action_log_prob = tf.log(tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.))
      self.summary.append(tf.summary.histogram('spatial_action_prob', spatial_action_prob))
      self.summary.append(tf.summary.histogram('non_spatial_action_prob', non_spatial_action_prob))

      # Compute losses, more details in https://arxiv.org/abs/1602.01783
      # Policy loss and value loss
      action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob
      advantage = tf.stop_gradient(self.value_target - self.value)
      policy_loss = - tf.reduce_mean(action_log_prob * advantage)
      value_loss = - tf.reduce_mean(self.value * advantage)
      self.summary.append(tf.summary.scalar('policy_loss', policy_loss))
      self.summary.append(tf.summary.scalar('value_loss', value_loss))

      # TODO: policy penalty
      loss = policy_loss + value_loss

      # Build the optimizer
      self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate')
      opt = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-10)
      grads = opt.compute_gradients(loss)
      cliped_grad = []
      for grad, var in grads:
        self.summary.append(tf.summary.histogram(var.op.name, var))
        self.summary.append(tf.summary.histogram(var.op.name+'/grad', grad))
        grad = tf.clip_by_norm(grad, 10.0)
        cliped_grad.append([grad, var])
      self.train_op = opt.apply_gradients(cliped_grad)
      self.summary_op = tf.summary.merge(self.summary)

      self.saver = tf.train.Saver(max_to_keep=100)
开发者ID:fanyp17,项目名称:pysc2-agents,代码行数:59,代码来源:a3c_agent.py


注:本文中的tensorflow.clip_by_norm函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。