当前位置: 首页>>代码示例>>Python>>正文


Python constants.ENTROPY_BETA属性代码示例

本文整理汇总了Python中constants.ENTROPY_BETA属性的典型用法代码示例。如果您正苦于以下问题:Python constants.ENTROPY_BETA属性的具体用法?Python constants.ENTROPY_BETA怎么用?Python constants.ENTROPY_BETA使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在constants的用法示例。


在下文中一共展示了constants.ENTROPY_BETA属性的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: rl_loss

# 需要导入模块: import constants [as 别名]
# 或者: from constants import ENTROPY_BETA [as 别名]
def rl_loss(self):
        with tf.variable_scope('a3c_loss'):
            action_size = self.pi.get_shape().as_list()[1]
            self.taken_action = tf.placeholder(tf.float32, [None, action_size], name='taken_action')

            # temporary difference (R-V) (input for policy)
            self.td = tf.placeholder(tf.float32, [None], name='td_placeholder')

            # avoid NaN with clipping when value in pi becomes zero
            log_pi = tf.log(tf.clip_by_value(self.pi, 1e-20, 1.0))

            # policy entropy
            entropy = -tf.reduce_sum(self.pi * log_pi, axis=1)

            # policy loss (output)  (Adding minus, because the original paper's
            # objective function is for gradient ascent, but we use gradient
            # descent optimizer.)
            self.policy_loss = -tf.reduce_mean(tf.reduce_sum(
                tf.multiply(log_pi, self.taken_action), axis=1) * self.td + entropy * constants.ENTROPY_BETA)

            # R (input for value)
            self.r = tf.placeholder(tf.float32, [None], name='reward_placeholder')

            # value loss (output)
            # (Learning rate for Critic is half of Actor's, so multiply by 0.5) and half from L2 Loss.
            self.value_loss = 0.25 * tf.losses.huber_loss(self.r, self.v)

            # gradienet of policy and value are summed up
            self.rl_total_loss = self.policy_loss + self.value_loss 
开发者ID:danielgordon10,项目名称:thor-iqa-cvpr-2018,代码行数:31,代码来源:rl_network.py

示例2: __init__

# 需要导入模块: import constants [as 别名]
# 或者: from constants import ENTROPY_BETA [as 别名]
def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device,
               network_scope="network",
               scene_scope="scene",
               task_scope="task"):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    self.network_scope = network_scope
    self.scene_scope = scene_scope
    self.task_scope = task_scope
    self.scopes = [network_scope, scene_scope, task_scope]

    self.local_network = ActorCriticFFNetwork(
                           action_size=ACTION_SIZE,
                           device=device,
                           network_scope=network_scope,
                           scene_scopes=[scene_scope])

    self.local_network.prepare_loss(ENTROPY_BETA, self.scopes)

    self.trainer = AccumTrainer(device)
    self.trainer.prepare_minimize(self.local_network.total_loss,
                                  self.local_network.get_vars())

    self.accum_gradients = self.trainer.accumulate_gradients()
    self.reset_gradients = self.trainer.reset_gradients()

    accum_grad_names = [self._local_var_name(x) for x in self.trainer.get_accum_grad_list()]
    global_net_vars = [x for x in global_network.get_vars() if self._get_accum_grad_name(x) in accum_grad_names]

    self.apply_gradients = grad_applier.apply_gradients(
      global_net_vars, self.trainer.get_accum_grad_list() )

    self.sync = self.local_network.sync_from(global_network)

    self.env = None

    self.local_t = 0

    self.initial_learning_rate = initial_learning_rate

    self.episode_reward = 0
    self.episode_length = 0
    self.episode_max_q = -np.inf 
开发者ID:yushu-liu,项目名称:icra2017-visual-navigation,代码行数:55,代码来源:training_thread.py

示例3: __init__

# 需要导入模块: import constants [as 别名]
# 或者: from constants import ENTROPY_BETA [as 别名]
def __init__(self,
               thread_index,
               global_network,
               pinitial_learning_rate,
               plearning_rate_input,
               pgrad_applier,
               vinitial_learning_rate,
               vlearning_rate_input,
               vgrad_applier,
               max_global_time_step,
               device,task_index=""):

    self.thread_index = thread_index
    self.plearning_rate_input = plearning_rate_input
    self.vlearning_rate_input = vlearning_rate_input
    self.max_global_time_step = max_global_time_step
    self.game_state = GameState()
    state=self.game_state.reset();
    self.game_state.reset_gs(state);
    self.action_size=self.game_state.action_size;
    self.state_size=self.game_state.state_size;
    self.local_max_iter=self.game_state.local_max_iter;

    if USE_LSTM:
      self.local_network = GameACLSTMNetwork(self.action_size,self.state_size,self.game_state.action_low,self.game_state.action_high, thread_index, device)
    else:
      self.local_network = GameACFFNetwork(self.action_size,self.state_size,self.game_state.action_low,self.game_state.action_high, thread_index, device)

    self.local_network.prepare_loss(ENTROPY_BETA)

    with tf.device(device):
      pvar_refs = [v._ref() for v in self.local_network.get_pvars()]
      self.policy_gradients = tf.gradients(
        self.local_network.policy_loss, pvar_refs,
        gate_gradients=False,
        aggregation_method=None,
        colocate_gradients_with_ops=False)
      vvar_refs = [v._ref() for v in self.local_network.get_vvars()]
      self.value_gradients = tf.gradients(
        self.local_network.value_loss, vvar_refs,
        gate_gradients=False,
        aggregation_method=None,
        colocate_gradients_with_ops=False)

    self.apply_policy_gradients = pgrad_applier.apply_gradients(
      self.local_network.get_pvars(),
      self.policy_gradients )
    self.apply_value_gradients = vgrad_applier.apply_gradients(
      self.local_network.get_vvars(),
      self.value_gradients )
    
    self.local_t = 0

    self.pinitial_learning_rate = pinitial_learning_rate
    self.vinitial_learning_rate = vinitial_learning_rate

    self.episode_reward = 0

    # variable controling log output
    self.prev_local_t = 0 
开发者ID:jsikyoon,项目名称:a3c-distributed_tensorflow,代码行数:62,代码来源:a3c_training_thread.py

示例4: __init__

# 需要导入模块: import constants [as 别名]
# 或者: from constants import ENTROPY_BETA [as 别名]
def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device,task_index=""):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    if USE_LSTM:
      self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
    else:
      self.local_network = GameACFFNetwork(ACTION_SIZE, thread_index, device)

    self.local_network.prepare_loss(ENTROPY_BETA)

    with tf.device(device):
      var_refs = [v._ref() for v in self.local_network.get_vars()]
      self.gradients = tf.gradients(
        self.local_network.total_loss, var_refs,
        gate_gradients=False,
        aggregation_method=None,
        colocate_gradients_with_ops=False)

    if(global_network):
      self.apply_gradients = grad_applier.apply_gradients(
        global_network.get_vars(),
        self.gradients )
      self.sync = self.local_network.sync_from(global_network)
      self.mode="threading";
    else:
      self.apply_gradients = grad_applier.apply_gradients(
        self.local_network.get_vars(),
        self.gradients )
      self.mode="dist_tensor";
    if not (task_index): 
      self.game_state = GameState(113 * thread_index)
    else:
      self.game_state = GameState(113 * task_index)
    
    self.local_t = 0

    self.initial_learning_rate = initial_learning_rate

    self.episode_reward = 0

    # variable controling log output
    self.prev_local_t = 0 
开发者ID:jsikyoon,项目名称:a3c-distributed_tensorflow,代码行数:54,代码来源:a3c_training_thread.py

示例5: __init__

# 需要导入模块: import constants [as 别名]
# 或者: from constants import ENTROPY_BETA [as 别名]
def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    if USE_LSTM:
      self.local_network = GameACLSTMNetwork(ACTION_SIZE, thread_index, device)
    else:
      self.local_network = GameACFFNetwork(ACTION_SIZE, thread_index, device)

    self.local_network.prepare_loss(ENTROPY_BETA)

    with tf.device(device):
      var_refs = [v._ref() for v in self.local_network.get_vars()]
      self.gradients = tf.gradients(
        self.local_network.total_loss, var_refs,
        gate_gradients=False,
        aggregation_method=None,
        colocate_gradients_with_ops=False)

    self.apply_gradients = grad_applier.apply_gradients(
      global_network.get_vars(),
      self.gradients )
      
    self.sync = self.local_network.sync_from(global_network)
    
    self.game_state = GameState(113 * thread_index)
    
    self.local_t = 0

    self.initial_learning_rate = initial_learning_rate

    self.episode_reward = 0

    # variable controling log output
    self.prev_local_t = 0 
开发者ID:miyosuda,项目名称:async_deep_reinforce,代码行数:46,代码来源:a3c_training_thread.py

示例6: __init__

# 需要导入模块: import constants [as 别名]
# 或者: from constants import ENTROPY_BETA [as 别名]
def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device,FLAGS="",task_index=""):

    self.thread_index = thread_index
    self.task_index = task_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step
    self.limit_global_time_step = 100*10**6;
   
    if(FLAGS.use_lstm): 
      self.local_network = GameACPathNetLSTMNetwork(ACTION_SIZE, thread_index, device,FLAGS)
    else:
      self.local_network = GameACPathNetNetwork(ACTION_SIZE, thread_index, device,FLAGS)
    
    self.local_network.prepare_loss(ENTROPY_BETA)
    with tf.device(device):
      var_refs = [v._ref() for v in self.local_network.get_vars()]
      self.gradients = tf.gradients(
        self.local_network.total_loss, var_refs,
        gate_gradients=False,
        aggregation_method=None,
        colocate_gradients_with_ops=False)

    self.apply_gradients = grad_applier.apply_gradients(
      self.local_network.get_vars(),
      self.gradients )

    self.game_state = GameState(113 * task_index)
    
    self.local_t = 0

    self.initial_learning_rate = initial_learning_rate

    self.episode_reward = 0

    # variable controling log output
    self.prev_local_t = 0 
开发者ID:jsikyoon,项目名称:pathnet,代码行数:45,代码来源:a3c_training_thread.py


注:本文中的constants.ENTROPY_BETA属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。