当前位置: 首页>>代码示例>>Python>>正文


Python tensor.icol函数代码示例

本文整理汇总了Python中theano.tensor.icol函数的典型用法代码示例。如果您正苦于以下问题:Python icol函数的具体用法?Python icol怎么用?Python icol使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了icol函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

	def __init__(self, args):
		reward = T.col('r')
		action = T.icol('a')
		terminal = T.icol('t')
		discount = T.scalar('gamma')
		learningRate = T.scalar('lr')
		rho = T.scalar('rho')
		epsilon = T.scalar('eps')
		rng = np.random.RandomState(42)
		
		self.batchNb = args.batchSize
		
		#convLayers = [[(8,8),(4,4),64],
		#			  [(4,4),(2,2),128],
		#			  [(3,3),(1,1),256],
		#			  [(3,3),(1,1),512]]
		#fcl = [1024, 6]
		
		convLayers = [[(8,8),(4,4),64],
					  [(4,4),(2,2),128],
					  [(3,3),(1,1),256],
					  [(3,3),(1,1),256]]
		fcl = [1024, args.actionNb]
		self.q1 = NetStruct(convLayers, fcl, (4,100,100), rng, args)
		self.q2 = NetStruct(convLayers, fcl, (4,100,100), rng, args)
		self.q2.setParams(self.q1)
		
		self.states = theano.shared(np.zeros((args.batchSize,4,100,100), dtype='float32'))
		self.states2 = theano.shared(np.zeros((args.batchSize,4,100,100), dtype='float32'))
		self.actions = theano.shared(np.zeros((args.batchSize,1), dtype='int32'), broadcastable=(False,True))
		self.rewards = theano.shared(np.zeros((args.batchSize,1), dtype='float32'), broadcastable=(False,True))
		self.terminals = theano.shared(np.zeros((args.batchSize,1), dtype='int32'), broadcastable=(False,True))
		
		self.learningRate = theano.shared(np.array(args.learningRate, dtype='float32'))
		self.rho = theano.shared(np.array(args.rmsPropRho, dtype='float32'))
		self.epsilon = theano.shared(np.array(args.rmsPropEpsilon, dtype='float32'))
		self.discount = theano.shared(np.array(args.discountFactor, dtype='float32'))
		
		loss = self.QLoss(self.q1.output, self.q2.output, action, reward, terminal, discount)
		
		params = self.q1.getParams()
		
		updates = self.rmsProp(loss, params, rho, epsilon, learningRate)
		self.train_model = theano.function(
			[],
			loss,
			updates=updates,
			givens = { 
					   self.q1.input: self.states,
					   self.q2.input: self.states2,
					   action: self.actions,
					   reward: self.rewards,
					   terminal: self.terminals,
					   discount: self.discount,
					   learningRate: self.learningRate,
					   rho: self.rho,
					   epsilon: self.epsilon
					 }
		)
开发者ID:Levoila,项目名称:CrappyAI,代码行数:59,代码来源:net.py

示例2: __init__

 def __init__(self, lenW, dimW, dimS):
     self.W = th.shared(np.random.randn(lenW, dimW))
     self.Uw = th.shared(np.random.randn(dimW, dimS))
     self.Us = th.shared(np.random.randn(dimS, dimS))
     self.V = th.shared(np.random.randn(dimS, lenW))
     self.S0 = th.shared(np.random.randn(dimS,))
     self.idx = T.icol()
     self.w = self.W[self.idx].reshape((self.idx.shape[0], self.W.shape[1]))
     def recurrence(w, s):
         # import ipdb; ipdb.set_trace()
         s1 = T.nnet.sigmoid(T.dot(w, self.Uw))
         s2 = T.nnet.sigmoid(T.dot(s, self.Us))
         ss = s1 + s2
         pp = T.dot(s, self.V)
         return [ss, pp]
     [self.S, self.PP], _ = th.scan(fn=recurrence, sequences=self.w, outputs_info=[self.S0, None], n_steps=self.w.shape[0])
     self.P = T.nnet.softmax(self.PP)
     self.RP = self.P[T.arange(self.w.shape[0]), self.idx[:,0]]
     self.cost = -T.sum(T.log(self.RP))
     self.params = [self.W, self.Uw, self.Us, self.V, self.S0]
     self.grads = T.grad(self.cost, self.params)
     self.lr = T.scalar()
     self.updates = map(lambda (param, grad): (param, param - self.lr * grad), zip(self.params, self.grads))
     self.train_fn = th.function([self.idx, self.lr], [self.cost], updates=self.updates, allow_input_downcast=True)
     self.fprop = th.function([self.idx], [self.S, self.P, self.cost], allow_input_downcast=True)
开发者ID:sherjilozair,项目名称:daedalus,代码行数:25,代码来源:elman.py

示例3: __init__

    def __init__(self, input_width, input_height, output_dim, num_frames, batch_size):
        self.input_width = input_width
        self.input_height = input_height
        self.output_dim = output_dim
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.gamma = 0.99 # discount factor
        self.rho = 0.99
        self.lr = 0.00025 # learning rate
        self.momentum = 0.95
        self.freeze_targets = True

        self.l_out = self.build_network(input_width, input_height, output_dim, num_frames, batch_size)
        if self.freeze_targets:
            self.next_l_out = self.build_network(input_width, input_height, output_dim, num_frames, batch_size)
            self.reset_q_hat()

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
#        terminals = T.icol('terminals')

        self.states_shared = theano.shared(np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX))
        self.next_states_shared = theano.shared(np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros((batch_size, 1), dtype=theano.config.floatX), broadcastable=(False,True))
        self.actions_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False,True))
#        self.terminals_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False,True))

        q_vals = self.l_out.get_output(states / 255.0)
        if self.freeze_targets:
            next_q_vals = self.next_l_out.get_output(next_states / 255.0)
        else:
            next_q_vals = self.l_out.get_output(next_states / 255.0)
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        target = rewards + self.gamma * T.max(next_q_vals, axis=1, keepdims=True)
        diff = target - q_vals[T.arange(batch_size), actions.reshape((-1,))].reshape((-1,1))
        loss = T.mean(diff ** 2)

        params = lasagne.layers.helper.get_all_params(self.l_out)
        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
#            terminals: self.terminals_shared
        }
        if self.momentum > 0:
            updates = rmsprop_nesterov(loss, params, self.lr, self.rho, self.momentum, 1e-2)
        else:
            updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho, 1e-6)
        self._train = theano.function([], [loss, q_vals], updates=updates, givens=givens)
        self._q_vals = theano.function([], q_vals, givens={ states: self.states_shared })
开发者ID:npow,项目名称:deep_q_rl,代码行数:54,代码来源:network.py

示例4: _create_network

    def _create_network(self):
        logger.info("Building network ...")
        net, input_var = self._build_network()
        target_values = T.matrix('target_output')
        actions = T.icol('actions')

        # Create masks
        # mask = theano.shared(np.zeros((self.batch_size, self.num_actions)).astype(np.int32))
        mask = T.zeros_like(target_values)
        mask = T.set_subtensor(mask[T.arange(self.batch_size), actions.reshape((-1,))], 1)

        # feed-forward path
        network_output = lasagne.layers.get_output(net, input_var / 255.0)

        # Add regularization penalty
        loss = squared_error(network_output * mask, target_values).mean()
        if self.weight_decay > 0.0:
            loss += regularize_network_params(net, l2) * self.weight_decay

        # Retrieve all parameters from the network
        all_params = lasagne.layers.get_all_params(net, trainable=True)

        # Compute updates for training
        if self.clip_error:
            grads = theano.gradient.grad(loss, all_params)
            grads = [lasagne.updates.norm_constraint(grad, self.clip_error, range(grad.ndim)) for grad in grads]
            updates = self.optimizer(grads, all_params, learning_rate=self.learning_rate, rho=self.decay_rate)
        else:
            updates = self.optimizer(loss, all_params, learning_rate=self.learning_rate, rho=self.decay_rate)

        # Theano functions for training and computing cost
        logger.info("Compiling functions ...")
        train = theano.function([input_var, target_values, actions], [loss, network_output, target_values, mask], updates=updates)
        predict = theano.function([input_var], network_output)

        return net, train, predict
开发者ID:nikolaypavlov,项目名称:simple_dqn,代码行数:36,代码来源:deep_q_network.py

示例5: __init__

    def __init__(self, input_width, input_height, num_actions,
                 num_frames, discount, learning_rate, rho,
                 rms_epsilon, momentum, clip_delta, freeze_interval,
                 batch_size, update_rule,
                 batch_accumulator, state_count, input_scale=255.0):
                     
        self.state_count=state_count
        self.input_width = input_width
        self.input_height = input_height
        self.num_actions = num_actions
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.discount = discount
        self.rho = rho
        self.lr = learning_rate
        self.rms_epsilon = rms_epsilon
        self.momentum = momentum
        self.clip_delta = clip_delta
        self.freeze_interval = freeze_interval

        self.update_counter = 0
        
        self.l_out = self.build_nature_network_dnn(input_width, input_height,
                                        num_actions, num_frames, batch_size)
        
        if self.freeze_interval > 0:
            self.next_l_out = self.build_nature_network_dnn(input_width,
                                                 input_height, num_actions,
                                                 num_frames, batch_size)
            self.reset_q_hat()

        states = T.matrix('states')
        next_states = T.matrix('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

#buferis inputu viso batch
        self.states_shared = theano.shared(
            np.zeros((batch_size, state_count),
                     dtype=theano.config.floatX))

#buferis i koki state patenka visiem
        self.next_states_shared = theano.shared(
            np.zeros((batch_size, state_count),
                     dtype=theano.config.floatX))

#po 1 reward kiekvienam episode, tai kaip del atskiru veiksmu?
        self.rewards_shared = theano.shared(
            np.zeros((batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

#po 1 priimta action kiekvienam episode
        self.actions_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

#?? turbut 0 ir 1, ar paskutine verte ar ne
        self.terminals_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

#paima qvals ir nexxt qvals ir grazina skirtumus batchui, viskas tik pirmam kartui

        q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)
        if self.freeze_interval > 0:
            next_q_vals = lasagne.layers.get_output(self.next_l_out,
                                                    next_states / input_scale)
        else:
            next_q_vals = lasagne.layers.get_output(self.l_out,
                                                    next_states / input_scale)
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        target = (rewards +
                  (T.ones_like(terminals) - terminals) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        diff = target - q_vals[T.arange(batch_size),
                               actions.reshape((-1,))].reshape((-1, 1))

#neaisku
        if self.clip_delta > 0:
            diff = diff.clip(-self.clip_delta, self.clip_delta)

        if batch_accumulator == 'sum':
            loss = T.sum(diff ** 2)
        elif batch_accumulator == 'mean':
            loss = T.mean(diff ** 2)
        else:
            raise ValueError("Bad accumulator: {}".format(batch_accumulator))


#
        params = lasagne.layers.helper.get_all_params(self.l_out)
        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }
#.........这里部分代码省略.........
开发者ID:navd,项目名称:AlgoTrading,代码行数:101,代码来源:q_network.py

示例6: initialize_network

    def initialize_network(self):
        """
        :description: this method initializes the network, updates, and theano functions for training and 
            retrieving q values. Here's an outline: 

            1. build the q network and target q network
            2. initialize theano symbolic variables used for compiling functions
            3. initialize the theano numeric variables used as input to functions
            4. formulate the symbolic loss 
            5. formulate the symbolic updates 
            6. compile theano functions for training and for getting q_values
        """
        batch_size, input_shape = self.batch_size, self.input_shape
        lasagne.random.set_rng(self.rng)

        # 1. build the q network and target q network
        self.l_out = self.build_network(input_shape, self.num_actions, batch_size)
        self.next_l_out = self.build_network(input_shape, self.num_actions, batch_size)
        self.reset_target_network()

        # 2. initialize theano symbolic variables used for compiling functions
        states = T.tensor4('states')
        actions = T.icol('actions')
        rewards = T.col('rewards')
        next_states = T.tensor4('next_states')
        # terminals are used to indicate a terminal state in the episode and hence a mask over the future
        # q values i.e., Q(s',a')
        terminals = T.icol('terminals')

        # 3. initialize the theano numeric variables used as input to functions
        self.states_shape = (batch_size,) + (1,) + input_shape
        self.states_shared = theano.shared(np.zeros(self.states_shape, dtype=theano.config.floatX))
        self.next_states_shared = theano.shared(np.zeros(self.states_shape, dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros((batch_size, 1), dtype=theano.config.floatX), 
            broadcastable=(False, True))
        self.actions_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))
        self.terminals_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        # 4. formulate the symbolic loss 
        q_vals = lasagne.layers.get_output(self.l_out, states)
        next_q_vals = lasagne.layers.get_output(self.next_l_out, next_states)
        target = (rewards +
                 (T.ones_like(terminals) - terminals) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        # reshape((-1,)) == 'make a row vector', reshape((-1, 1) == 'make a column vector'
        diff = target - q_vals[T.arange(batch_size), actions.reshape((-1,))].reshape((-1, 1))


        # a lot of the deepmind work clips the td error at 1 so we do that here
        # the problem is that gradient backpropagating through this minimum node
        # will be zero if diff is larger then 1.0 (because changing params before
        # the minimum does not impact the output of the minimum). To account for 
        # this we take the part of the td error (magnitude) greater than 1.0 and simply
        # add it to the loss, which allows gradient to backprop but just linearly
        # in the td error rather than quadratically
        quadratic_part = T.minimum(abs(diff), 1.0)
        linear_part = abs(diff) - quadratic_part
        loss = 0.5 * quadratic_part ** 2 + linear_part
        loss = T.mean(loss) + self.regularization * regularize_network_params(self.l_out, l2)

        # 5. formulate the symbolic updates 
        params = lasagne.layers.helper.get_all_params(self.l_out)  
        updates = self.initialize_updates(self.update_rule, loss, params, self.learning_rate)

        # 6. compile theano functions for training and for getting q_values
        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }
        self._train = theano.function([], [loss, q_vals], updates=updates, givens=givens)
        self._get_q_values = theano.function([], q_vals, givens={states: self.states_shared})
开发者ID:gandalfvn,项目名称:hierarchical_rl,代码行数:76,代码来源:qnetwork.py

示例7: __init__


#.........这里部分代码省略.........
            self.q_layers.append(
                layers.DenseLayer(self.q_layers[-1],
                                  n_outputs=num_actions,
                                  weights_std=0.01,
                                  init_bias_value=0.1,
                                  dropout=0,
                                  nonlinearity=layers.identity))


        if approximator == 'none':
            self.q_layers.append(\
                layers.DenseLayerNoBias(self.q_layers[-1],
                                        n_outputs=num_actions,
                                        weights_std=0.00,
                                        dropout=0,
                                        nonlinearity=layers.identity))


        self.q_layers.append(layers.OutputLayer(self.q_layers[-1]))

        for i in range(len(self.q_layers)-1):
            print self.q_layers[i].get_output_shape()


        # Now create a network (using the same weights)
        # for next state q values
        self.next_layers = copy_layers(self.q_layers)
        self.next_layers[0] = layers.Input2DLayer(self._batch_size,
                                                  self._num_input_features,
                                                  self._img_width,
                                                  self._img_height,
                                                  self.scale_input_by)
        self.next_layers[1].input_layer = self.next_layers[0]

        self.rewards = T.col()
        self.actions = T.icol()

        # Build the loss function ...
        q_vals = self.q_layers[-1].predictions()
        next_q_vals = self.next_layers[-1].predictions()
        next_maxes = T.max(next_q_vals, axis=1, keepdims=True)
        target = self.rewards + discount * next_maxes
        target = theano.gradient.consider_constant(target)
        diff = target - q_vals
        # Zero out all entries for actions that were not chosen...
        mask = build_mask(T.zeros_like(diff), self.actions, 1.0)
        diff_masked = diff * mask
        error = T.mean(diff_masked ** 2)
        self._loss = error * diff_masked.shape[1] #

        self._parameters = layers.all_parameters(self.q_layers[-1])

        self._idx = T.lscalar('idx')

        # CREATE VARIABLES FOR INPUT AND OUTPUT
        self.states_shared = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.states_shared_next = theano.shared(
            np.zeros((1, 1, 1, 1), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(
            np.zeros((1, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))
        self.actions_shared = theano.shared(
            np.zeros((1, 1), dtype='int32'), broadcastable=(False, True))

        self._givens = \
            {self.q_layers[0].input_var:
             self.states_shared[self._idx*self._batch_size:
                                (self._idx+1)*self._batch_size, :, :, :],
             self.next_layers[0].input_var:
             self.states_shared_next[self._idx*self._batch_size:
                                     (self._idx+1)*self._batch_size, :, :, :],

             self.rewards:
             self.rewards_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :],
             self.actions:
             self.actions_shared[self._idx*self._batch_size:
                                 (self._idx+1)*self._batch_size, :]
             }

        if self.momentum != 0:
            self._updates = layers.gen_updates_rmsprop_and_nesterov_momentum(\
                self._loss, self._parameters, learning_rate=self.learning_rate,
                rho=self.decay, momentum=self.momentum, epsilon=1e-6)
        else:
            self._updates = layers.gen_updates_rmsprop(self._loss,
                self._parameters, learning_rate=self.learning_rate,
                rho=self.decay, epsilon=1e-6)

        self._train = theano.function([self._idx], self._loss,
                                      givens=self._givens,
                                      updates=self._updates)
        self._compute_loss = theano.function([self._idx],
                                             self._loss,
                                             givens=self._givens)
        self._compute_q_vals = \
            theano.function([self.q_layers[0].input_var],
                            self.q_layers[-1].predictions(),
                            on_unused_input='ignore')
开发者ID:akansal1,项目名称:einstein,代码行数:101,代码来源:cnn_q_learner.py

示例8: test_git_on_gip

def test_git_on_gip(hyper_params=None, rng_seed=1234):
    assert(not (hyper_params is None))
    # Initialize a source of randomness
    rng = np.random.RandomState(rng_seed)

    sup_count = 100
    # Load some data to train/validate/test with
    dataset = 'data/mnist.pkl.gz'
    datasets = load_udm_ss(dataset, sup_count, rng, zero_mean=False)
    Xtr_su = datasets[0][0].get_value(borrow=False)
    Ytr_su = datasets[0][1].get_value(borrow=False).astype(np.int32)
    Xtr_un = datasets[1][0].get_value(borrow=False)
    Ytr_un = datasets[1][1].get_value(borrow=False).astype(np.int32)
    # get the joint labeled and unlabeled data
    Xtr_un = np.vstack([Xtr_su, Xtr_un]).astype(theano.config.floatX)
    Ytr_un = np.vstack([Ytr_su[:,np.newaxis], Ytr_un[:,np.newaxis]])
    # get the labeled data
    Xtr_su = Xtr_su.astype(theano.config.floatX)
    Ytr_su = Ytr_su[:,np.newaxis]
    # get observations and labels for the validation set
    Xva = datasets[2][0].get_value(borrow=False).astype(theano.config.floatX)
    Yva = datasets[2][1].get_value(borrow=False).astype(np.int32)
    Yva = Yva[:,np.newaxis] # numpy is dumb
    # get size information for the data
    un_samples = Xtr_un.shape[0]
    su_samples = Xtr_su.shape[0]
    va_samples = Xva.shape[0]

    # set up some symbolic variables for input/output
    Xp = T.matrix('Xp_base')
    Xd = T.matrix('Xd_base')
    Xc = T.matrix('Xc_base')
    Xm = T.matrix('Xm_base')
    Yd = T.icol('Yd_base')

    # set some "shape" parameters for the networks
    data_dim = Xtr_un.shape[1]
    label_dim = 10
    prior_1_dim = 50
    prior_2_dim = 50
    prior_sigma = 1.0
    batch_size = 100

    ##################
    # SETUP A GIPAIR #
    ##################
    gn1_params = {}
    gn1_config = [prior_1_dim, 600, 600, data_dim]
    gn1_params['mlp_config'] = gn1_config
    gn1_params['activation'] = softplus_actfun
    gn1_params['out_type'] = 'bernoulli'
    gn1_params['lam_l2a'] = 1e-3
    gn1_params['vis_drop'] = 0.0
    gn1_params['hid_drop'] = 0.0
    gn1_params['bias_noise'] = 0.1
    # choose some parameters for the continuous inferencer
    in1_params = {}
    shared_config = [data_dim, 600, 600]
    top_config = [shared_config[-1], prior_1_dim]
    in1_params['shared_config'] = shared_config
    in1_params['mu_config'] = top_config
    in1_params['sigma_config'] = top_config
    in1_params['activation'] = softplus_actfun
    in1_params['lam_l2a'] = 1e-3
    in1_params['vis_drop'] = 0.0
    in1_params['hid_drop'] = 0.0
    in1_params['bias_noise'] = 0.1
    in1_params['input_noise'] = 0.0
    # Initialize the base networks for this GIPair
    IN1 = InfNet(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, prior_sigma=prior_sigma, \
            params=in1_params, shared_param_dicts=None)
    GN1 = GenNet(rng=rng, Xp=Xp, prior_sigma=prior_sigma, \
            params=gn1_params, shared_param_dicts=None)
    # Initialize biases in IN and GN
    IN1.init_biases(0.0)
    GN1.init_biases(0.0)
    # Initialize the GIPair
    GIP = GIPair(rng=rng, Xd=Xd, Xc=Xc, Xm=Xm, g_net=GN1, i_net=IN1, \
            data_dim=data_dim, prior_dim=prior_1_dim, \
            params=None, shared_param_dicts=None)
    # Set cost weighting parameters
    GIP.set_lam_nll(1.0)
    GIP.set_lam_kld(1.0)
    GIP.set_lam_l2w(1e-4)

    ##################
    # SETUP A GITRIP #
    ##################
    # set parameters for the generator network
    gn2_params = {}
    gn2_config = [(prior_2_dim + label_dim), 300, prior_1_dim]
    gn2_params['mlp_config'] = gn2_config
    gn2_params['activation'] = softplus_actfun
    gn2_params['out_type'] = 'gaussian'
    gn2_params['lam_l2a'] = 1e-3
    gn2_params['vis_drop'] = 0.0
    gn2_params['hid_drop'] = 0.0
    gn2_params['bias_noise'] = 0.1
    # choose some parameters for the continuous inferencer
    in2_params = {}
#.........这里部分代码省略.........
开发者ID:darcy0511,项目名称:NN-Python,代码行数:101,代码来源:MnistTests.py

示例9: __init__

    def __init__(self, environment, rho, rms_epsilon, momentum, clip_delta, freeze_interval, batchSize, network_type, 
                 update_rule, batch_accumulator, randomState, frame_scale=255.0):
        """ Initialize environment

        Arguments:
            environment - the environment (class Env) 
            num_elements_in_batch - list of k integers for the number of each element kept as belief state
            num_actions - int
            discount - float
            learning_rate - float
            rho, rms_epsilon, momentum - float, float, float
            ...
            network_type - string 
            ...           
        """

        self._environment = environment
        
        self._batchSize = batchSize
        self._inputDimensions = self._environment.inputDimensions()
        self._nActions = self._environment.nActions()
        self._df = 0
        self.rho = rho
        self._lr = 0
        self.rms_epsilon = rms_epsilon
        self.momentum = momentum
        self.clip_delta = clip_delta
        self.freeze_interval = freeze_interval
        self._randomState = randomState
        
        lasagne.random.set_rng(self._randomState)

        self.update_counter = 0
        
        states=[]   # list of symbolic variables for each of the k element in the belief state
                    # --> [ T.tensor4 if observation of element=matrix, T.tensor3 if vector, T.tensor 2 if scalar ]
        next_states=[] # idem than states at t+1 
        self.states_shared=[] # list of shared variable for each of the k element in the belief state
        self.next_states_shared=[] # idem that self.states_shared at t+1

        for i, dim in enumerate(self._inputDimensions):
            if len(dim) == 3:
                states.append(T.tensor4("%s_%s" % ("state", i)))
                next_states.append(T.tensor4("%s_%s" % ("next_state", i)))

            elif len(dim) == 2:
                states.append(T.tensor3("%s_%s" % ("state", i)))
                next_states.append(T.tensor3("%s_%s" % ("next_state", i)))
                
            elif len(dim) == 1:            
                states.append( T.matrix("%s_%s" % ("state", i)) )
                next_states.append( T.matrix("%s_%s" % ("next_state", i)) )
                
            self.states_shared.append(theano.shared(np.zeros((batchSize,) + dim, dtype=theano.config.floatX) , borrow=False))
            self.next_states_shared.append(theano.shared(np.zeros((batchSize,) + dim, dtype=theano.config.floatX) , borrow=False))
        
        print("Number of observations per state: {}".format(len(self.states_shared)))
        print("For each observation, historySize + ponctualObs_i.shape: {}".format(self._inputDimensions))
                
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')
        thediscount = T.scalar(name='thediscount', dtype=theano.config.floatX)
        thelr = T.scalar(name='thelr', dtype=theano.config.floatX)
        
        self.l_out, self.l_outs_conv, shape_after_conv = self._build(network_type, states)
        
        print("Number of neurons after spatial and temporal convolution layers: {}".format(shape_after_conv))

        self.next_l_out, self.next_l_outs_conv, shape_after_conv = self._build(network_type, next_states)
        self._resetQHat()

        self.rewards_shared = theano.shared(
            np.zeros((batchSize, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((batchSize, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            np.zeros((batchSize, 1), dtype='int32'),
            broadcastable=(False, True))


        q_vals = lasagne.layers.get_output(self.l_out)        
        
        next_q_vals = lasagne.layers.get_output(self.next_l_out)
        
        max_next_q_vals=T.max(next_q_vals, axis=1, keepdims=True)
        
        T_ones_like=T.ones_like(T.ones_like(terminals) - terminals)
        
        target = rewards + T_ones_like * thediscount * max_next_q_vals

        q_val=q_vals[T.arange(batchSize), actions.reshape((-1,))].reshape((-1, 1))

        diff = target - q_val

        if self.clip_delta > 0:
#.........这里部分代码省略.........
开发者ID:Gzzgz,项目名称:General_Deep_Q_RL,代码行数:101,代码来源:q_net_lasagne.py

示例10: setup

    def setup(self):
        lasagne.random.set_rng(self.rng)
        
        self.update_counter = 0

        self.l_out = self.build_q_network()              
               
        states = T.tensor3('states')
        next_states = T.tensor3('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')
        
        # Shared variables for training from a minibatch of replayed
        # state transitions, each consisting of an observation,
        # along with the chosen action and resulting
        # reward and terminal status.
        self.states_shared = theano.shared(
            np.zeros((self.batch_size, self.input_height, self.input_width), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(
            np.zeros((self.batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))
        self.actions_shared = theano.shared(
            np.zeros((self.batch_size, 1), dtype='int32'),
            broadcastable=(False, True))
        self.terminals_shared = theano.shared(
            np.zeros((self.batch_size, 1), dtype='int32'),
            broadcastable=(False, True))
        
        # Shared variable for a single state, to calculate q_vals.
        self.state_shared = theano.shared(
            np.zeros((self.input_height, self.input_width), dtype=theano.config.floatX))

        # Formulas
        q_vals = lasagne.layers.get_output(self.l_out, states / self.input_scale)
        
        next_q_vals = lasagne.layers.get_output(self.l_out, next_states / self.input_scale)
        next_q_vals = theano.gradient.disconnected_grad(next_q_vals)
        
        terminalsX = terminals.astype(theano.config.floatX)
        action_mask = T.eq(T.arange(self.num_actions).reshape((1, -1)),
                          actions.reshape((-1, 1))).astype(theano.config.floatX)

        target = (rewards +
                  (T.ones_like(terminalsX) - terminalsX) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        output = (q_vals * action_mask).sum(axis=1).reshape((-1, 1))
        diff = target - output

        loss = 0.5 * diff ** 2
        loss = T.sum(loss)
        #loss = T.mean(loss)

        # Params and givens            
        params = lasagne.layers.helper.get_all_params(self.l_out)  
        updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho, self.rms_epsilon)
        train_givens = {
            states: self.states_shared[:, :-1],
            next_states: self.imgs_shared[:, 1:],
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }
        self._train = theano.function([], [loss], updates=updates,
                                      givens=train_givens)
        q_givens = {
            states: self.state_shared.reshape((1,
                                               self.input_height,
                                               self.input_width))
        }
        self._q_vals = theano.function([], q_vals[0], givens=q_givens)
开发者ID:mortennp,项目名称:misc,代码行数:71,代码来源:deep_q_agent_lasagne.py

示例11: __init__

    def __init__(self, input_width, input_height, avail_actions, num_actions,
                 num_frames, discount, learning_rate, rho,
                 rms_epsilon, momentum, clip_delta, freeze_interval,
                 batch_size, network_type, update_rule,
                 batch_accumulator, rng, train_all, input_scale=255.0):

        self.input_width = input_width
        self.input_height = input_height
        self.avail_actions = avail_actions
        self.num_actions = num_actions
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.discount = discount
        self.rho = rho
        self.lr = learning_rate
        self.rms_epsilon = rms_epsilon
        self.momentum = momentum
        self.clip_delta = clip_delta
        self.freeze_interval = freeze_interval
        self.rng = rng
        self.train_all = train_all

        lasagne.random.set_rng(self.rng)

        self.update_counter = 0

        print "num_actions: " + str(num_actions)
        self.l_out = self.build_network(network_type, input_width, input_height,
                                        num_actions, num_frames, batch_size)
        if self.freeze_interval > 0:
            self.next_l_out = self.build_network(network_type, input_width,
                                                 input_height, num_actions,
                                                 num_frames, batch_size)
            self.reset_q_hat()

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

        self.states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.next_states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.rewards_shared = theano.shared(
            np.zeros((batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)

        if self.freeze_interval > 0:
            next_q_vals = lasagne.layers.get_output(self.next_l_out,
                                                    next_states / input_scale)
        else:
            next_q_vals = lasagne.layers.get_output(self.l_out,
                                                    next_states / input_scale)
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        target = (rewards +
                  (T.ones_like(terminals) - terminals) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        diff = target - q_vals[T.arange(batch_size),
                               actions.reshape((-1,))].reshape((-1, 1))

        if self.clip_delta > 0:
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            #
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
            # there, which is what the DeepMind implementation does.
            quadratic_part = T.minimum(abs(diff), self.clip_delta)
            linear_part = abs(diff) - quadratic_part
            loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part
        else:
            loss = 0.5 * diff ** 2

        if batch_accumulator == 'sum':
            loss = T.sum(loss)
        elif batch_accumulator == 'mean':
            loss = T.mean(loss)
        else:
            raise ValueError("Bad accumulator: {}".format(batch_accumulator))

#.........这里部分代码省略.........
开发者ID:cowhi,项目名称:deep_q_rl,代码行数:101,代码来源:q_network.py

示例12: __init__

    def __init__(self, rng=None, Xd=None, \
            g_net=None, i_net=None, pn_seq=None, \
            data_dim=None, prior_dim=None, \
            params=None):
        # setup a rng for this AEDPair
        self.rng = RandStream(rng.randint(100000))

        if (params is None):
            self.params = {}
        else:
            self.params = params
        if 'match_type' in params:
            self.match_type = params['match_type']
        else:
            self.match_type = 'grad_sign'
        # we can only try to match sign or direction...
        assert((self.match_type == 'grad_dir') or \
                (self.match_type == 'grad_sign'))
        if self.match_type == 'grad_dir':
            # we match the direction of the gradient under the assumption
            # of gaussian observation noise
            self.mean_transform = lambda x: max_normalize(x, axis=1)
            assert(g_net.out_type == 'gaussian')
        else:
            # we match the sign of the gradient as if it were a collection
            # of independent binary variables
            self.mean_transform = lambda x: 2.0 * (x - 0.5)
            assert(g_net.out_type == 'bernoulli')

        # record the symbolic variables that will provide inputs to the
        # computation graph created to describe this AEDPair
        self.Xd = Xd
        self.Yd = T.icol('adp_Yd') # labels to pass to the PeaNetSeq
        self.Xc = 0.0 * self.Xd
        self.Xm = 0.0 * self.Xd
        self.obs_count = T.cast(Xd.shape[0], 'floatX')

        # create a "shared-parameter" clone of the inferencer, set up to
        # receive input from the appropriate symbolic variables.
        self.IN = i_net.shared_param_clone(rng=rng, \
                Xd=self.Xd, Xc=self.Xc, Xm=self.Xm)
        self.policy_mean = self.IN.output_mean
        self.policy_logvar = self.IN.output_logvar
        # capture a handle for samples from the variational posterior
        self.Xp = self.IN.output
        # create a "shared-parameter" clone of the generator, set up to
        # receive input from samples from the variational posterior
        self.GN = g_net.shared_param_clone(rng=rng, Xp=self.IN.output)
        # set up a var for controlling the max-norm bound on perturbations
        zero_ary = np.zeros((1,)).astype(theano.config.floatX)
        self.lam_mnb = theano.shared(value=zero_ary, \
                name='adp_lam_mnb')
        self.set_lam_mnb(lam_mnb=0.1)

        # get the perturbations output by the generator network
        self.Pg = self.mean_transform(self.GN.output)
        if self.match_type == 'grad_dir':
            # samples because we're matching gradient via squared error
            self.Pg_samples = self.mean_transform(self.GN.output_samples)
        else:
            # no samples, because we're matching gradient sign
            self.Pg_samples = self.mean_transform(self.GN.output)

        # record and validate the data dimensionality parameters
        self.data_dim = data_dim
        self.prior_dim = prior_dim
        # output of the generator and input to the inferencer should both be
        # equal to self.data_dim
        assert(self.data_dim == self.GN.mlp_layers[-1].out_dim)
        assert(self.data_dim == self.IN.shared_layers[0].in_dim)
        # input of the generator and mu/sigma outputs of the inferencer should
        # both be equal to self.prior_dim
        assert(self.prior_dim == self.GN.mlp_layers[0].in_dim)
        assert(self.prior_dim == self.IN.mu_layers[-1].out_dim)
        assert(self.prior_dim == self.IN.sigma_layers[-1].out_dim)

        # make a clone of the target PeaNetSeq that takes perturbed inputs
        self.PNS = pn_seq.shared_param_clone(rng=rng, seq_len=2, \
                seq_Xd=[self.Xd, self.Xd], seq_Yd=[self.Yd, self.Yd], \
                no_funcs=True)
        self.grad_pea_Xd = T.grad(self.PNS.joint_cost, self.Xd)
        if self.match_type == 'grad_dir':
            # turn gradient into a unit max-normalized vector
            self.match_target = max_normalize(self.grad_pea_Xd)
        else:
            # transform gradient into binary indicators of sign
            self.match_target = (self.grad_pea_Xd > 0.0)
        # get the symbolic vars for passing inputs to self.PNS
        self.Xd_seq = self.PNS.Xd_seq
        self.Yd_seq = self.PNS.Yd_seq
        self.seq_inputs = self.Xd_seq + self.Yd_seq

        # shared var learning rate for generator and inferencer
        self.lr_gn = theano.shared(value=zero_ary, name='adp_lr_gn')
        self.lr_in = theano.shared(value=zero_ary, name='adp_lr_in')
        # shared var momentum parameters for generator and inferencer
        self.mom_1 = theano.shared(value=zero_ary, name='adp_mom_1')
        self.mom_2 = theano.shared(value=zero_ary, name='adp_mom_2')
        self.it_count = theano.shared(value=zero_ary, name='adp_it_count')
        # init parameters for controlling learning dynamics
#.........这里部分代码省略.........
开发者ID:Philip-Bachman,项目名称:NN-Python,代码行数:101,代码来源:AEDPair.py

示例13: __init__

    def __init__(self, num_actions):
        
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = BATCH_SIZE
        self.discount_rate = DISCOUNT_RATE
        self.history_length = HISTORY_LENGTH
        self.screen_dim = DIMS
        self.img_height = SCREEN_HEIGHT
        self.img_width = SCREEN_WIDTH
        self.clip_error = CLIP_ERROR
        self.input_color_scale = COLOR_SCALE

        self.target_steps = TARGET_STEPS
        self.train_iterations = TRAIN_STEPS
        self.train_counter = 0
        self.momentum = MOMENTUM
        self.update_rule = UPDATE_RULE
        self.learning_rate = LEARNING_RATE
        self.rms_decay = RMS_DECAY
        self.rms_epsilon = RMS_EPSILON        
        
        self.rng = np.random.RandomState(RANDOM_SEED)

        # set seed
        lasagne.random.set_rng(self.rng)

        # prepare tensors once and reuse them
        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        # terminals are bool for our case
        terminals = T.bcol('terminals')

        # create shared theano variables
        self.states_shared = theano.shared(
            np.zeros((self.batch_size, self.history_length, self.img_height, self.img_width),
                     dtype=theano.config.floatX))

        self.next_states_shared = theano.shared(
            np.zeros((self.batch_size, self.history_length, self.img_height, self.img_width),
                     dtype=theano.config.floatX))

        # !broadcast ?
        self.rewards_shared = theano.shared(
            np.zeros((self.batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((self.batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            #np.zeros((self.batch_size, 1), dtype='int32'),
            np.zeros((self.batch_size, 1), dtype='int8'),
            broadcastable=(False, True))

        # can add multiple nets here
        self.l_primary = self.build_network()

        if self.target_steps > 0:
            self.l_secondary = self.build_network()
            self.copy_to_secondary()

        
        """
        # input scale i.e. division can be applied to input directly also to normalize
        """

        # define output symbols
        q_vals = lasagne.layers.get_output(self.l_primary, states / self.input_color_scale)
        
        if self.target_steps > 0:
            q_vals_secondary = lasagne.layers.get_output(self.l_secondary, next_states / self.input_color_scale)
        else:
            # why this ?
            q_vals_secondary = lasagne.layers.get_output(self.l_primary, next_states / self.input_color_scale)
            q_vals_secondary = theano.gradient.disconnected_grad(q_vals_secondary)

        # target = r + max
        target = (rewards + (T.ones_like(terminals) - terminals) * self.discount_rate * T.max(q_vals_secondary, axis=1, keepdims=True))
        
        """
        # check what this does
        """
        diff = target - q_vals[T.arange(self.batch_size),
                               actions.reshape((-1,))].reshape((-1, 1))

        # print shape ? 

        if self.clip_error > 0:
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            # 
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
#.........这里部分代码省略.........
开发者ID:hercky,项目名称:a3c,代码行数:101,代码来源:network.py

示例14: __init__

    def __init__(self, input_width, input_height, num_actions,
                 num_frames, discount, learning_rate,momentum,
                 batch_size, ):

        self.input_width = input_width
        self.input_height = input_height
        self.num_actions = num_actions
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.discount = discount
        self.lr = learning_rate
        self.momentum = momentum

        self.update_counter = 0

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

        self.states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.next_states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.rewards_shared = theano.shared(
            np.zeros((batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        from nn import network
        n, layers = network(n_channels=num_frames,
                            img_size=input_width, n_actions=num_actions)
        self.n = n
        q_vals = n.output(data_layer=states)
        next_q_vals = n.output(data_layer=next_states)
        next_q_vals = theano.gradient.disconnected_grad(next_q_vals)
        next_q_vals = T.minimum(0, next_q_vals)

        layers_samples = [l.output(data_layer=states) for l in layers]
        layers_batchstd = [T.mean(T.std(s, axis=0)) for s in layers_samples]
        w, b = n.weight(), n.bias()
        params = w + b

        target = (rewards +
                  (T.ones_like(terminals) - terminals) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        diff = target - q_vals[T.arange(batch_size),
                               actions.reshape((-1,))].reshape((-1, 1))

        loss = T.mean(diff ** 2)

        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }
        updates = lasagne.updates.rmsprop(loss, params, self.lr)

        if self.momentum > 0:
            updates = lasagne.updates.apply_momentum(updates, None,
                                                     self.momentum)

        self._train = theano.function([], [loss, q_vals], updates=updates,
                                      givens=givens)
        self._batchstd = theano.function([], layers_batchstd,
                                         givens={states: self.states_shared})
        self._sample = theano.function([], layers_samples,
                                       givens={states: self.states_shared})
        self._q_vals = theano.function([states], q_vals,)
开发者ID:rbn42,项目名称:ChessBox,代码行数:83,代码来源:qlearner.py

示例15: conv_layer

    l_in = lasagne.layers.InputLayer(
        shape=(None, num_frames, input_width, input_height)
    )

    l_conv = conv_layer(
        l_in,
        num_filters=16,
        filter_size=(8,8),
        stride=(4,4),
    )
    return l_conv

l_out = build_network()

rewards = T.col('rewards')
actions = T.icol('actions')
terminals = T.icol('terminals')

rewards_shared = theano.shared(
    np.zeros((batch_size, 1), dtype=theano.config.floatX),
    broadcastable=(False, True), name='rewards')

actions_shared = theano.shared(
    np.zeros((batch_size, 1), dtype='int32'),
    broadcastable=(False, True), name='actions')

givens = {
    rewards: rewards_shared,
    actions: actions_shared,
}
开发者ID:rubenvereecken,项目名称:deep_q_rl,代码行数:30,代码来源:test.py


注:本文中的theano.tensor.icol函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。