本文整理汇总了Python中planner.RoutePlanner方法的典型用法代码示例。如果您正苦于以下问题:Python planner.RoutePlanner方法的具体用法?Python planner.RoutePlanner怎么用?Python planner.RoutePlanner使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类planner
的用法示例。
在下文中一共展示了planner.RoutePlanner方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env):
super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color
self.color = 'red' # override color
self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint
# TODO: Initialize any additional variables here
self.possible_actions = ['forward', 'left', 'right', None]
self.possible_weights = [0, 0, 0, 0]
self.Qvalues = {}
self.initialQvalue = 10
self.Qiterations = {}
# Constants
self.alpha = 1 #learning rate, will decrease with iterations
self.gamma = .1 #discount factor
示例2: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env):
super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color
self.color = 'red' # override color
self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint
# TODO: Initialize any additional variables here
self.Qdf = self.init_Q()
self.past_state = 0
self.past_reward = 0
self.past_action = 'None'
self.epsilon_iter = 1
self.state_visit_hist=np.zeros(10,dtype=np.int)
示例3: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
super(LearningAgent, self).__init__(env) # Set the agent in the evironment
self.planner = RoutePlanner(self.env, self) # Create a route planner
self.valid_actions = self.env.valid_actions # The set of valid actions
# Set parameters of the learning agent
self.learning = learning # Whether the agent is expected to learn
self.Q = dict() # Create a Q-table which will be a dictionary of tuples
self.epsilon = epsilon # Random exploration factor
self.alpha = alpha # Learning factor
###########
## TO DO ##
###########
# Set any additional class parameters as needed
示例4: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env):
super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color
self.color = 'red' # override color
self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint
self.trial = 0 # [0,99]
self.unlocked = False
self.trial_end = False
# TODO: Initialize any additional variables here
# Valid actions
self.actions = [None, 'forward', 'left', 'right']
# Q-Learning
# Alpha (learning rate)
self.alpha = get_simulation_params(0)[0] # should decay with t too?
# Gamma (discount factor)
self.gamma = get_simulation_params(0)[1] #
self.epsilon = get_simulation_params(0)[2] # equal chance 0.5, progressive decay with t value
self.decay_factor = 1.0
self.Q = {}
self.Q_default_value = 0.0 #not learning yet
# Report
self.total_reward = []
self.total_penalties = []
self.trial_reward = 0
self.trial_penalty = 0
self.success = 0
self.failure = 0
self.last_failure = 0
示例5: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env, init_value=0, gamma=0.90, alpha=0.20, epsilon=0.10,
discount_deadline=False, history=0):
super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color
self.color = 'red' # override default color
self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint
## Initialize the Q-function as a dictionary (state) of dictionaries (actions)
self.q_function = {}
self.history = history
if self.history > 0:
self.init_q_function()
## Initial value of any (state, action) tuple is an arbitrary random number
self.init_value = init_value
## Discount factor gamma: 0 (myopic) vs 1 (long-term optimal)
self.gamma = gamma
self.discount_deadline = discount_deadline
## Learning rate alpha: 0 (no learning) vs 1 (consider only most recent information)
## NOTE: Normally, alpha decreases over time: for example, alpha = 1 / t
self.alpha = alpha
## Parameter of the epsilon-greedy action selection strategy
## NOTE: Normally, epsilon should also be decayed by the number of trials
self.epsilon = epsilon
## The trial number
self.trial = 1
## The cumulative reward
self.cumulative_reward = 0
示例6: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env):
super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color
self.color = 'red' # override color
self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint
# TODO: Initialize any additional variables here
# How likely we are gone explore new paths?
self.epsilon = 0.05
# Q learning update formula:
# https://en.wikipedia.org/wiki/Q-learning
# Good tutorial to start:
# http://mnemstudio.org/path-finding-q-learning-tutorial.htm
self.learning_rate = 0.90
#the initial value of Q value
self.default_q = 0
# discount factor
self.gamma = 0.10
self.Q_values = {}
self.prev_state = None
self.prev_action = None
self.prev_reward = None
self.penalty_num = 0
self.move_num = 0
示例7: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env):
super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color
self.color = 'red' # override color
self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint
# TODO: Initialize any additional variables here
self.Q = np.zeros((9,4))
self.Q[1][2] = 1
self.Q[3][3] = 1
self.Q[6][3] = 1
self.Q[7][1] = 1
self.Q[0][2] = -1
self.Q[2][2] = -1
self.Q[4][3] = -1
self.Q[5][3] = -1
self.Q[8][1] = -1
self.gamma = 0.8
self.alpha = 0.3
self.epsilon = 0.6
self.epsilon_start = 0.6
self.n_updates = 0
# possible states, these 9 states represents all the 96 possible states given
# the values of the variables used to define a state
self.states = np.array([
['left', 'green', 'forward', None],
['left', 'green', None, None],
['left', 'red', None, None],
['right', 'green', None, None],
['right', 'red', None, 'forward'],
['right', 'red', 'left', None],
['right', 'red', None, None],
['forward', 'green', None, None],
['forward', 'red', None, None]
])
示例8: __init__
# 需要导入模块: import planner [as 别名]
# 或者: from planner import RoutePlanner [as 别名]
def __init__(self, env):
super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color
self.color = 'red' # override color
self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint
self.actions = [None, 'forward', 'left', 'right']
self.learning_rate = 0.3
self.state = None
self.q = {}
self.trips = 0