当前位置: 首页>>代码示例>>Python>>正文


Python Model.bprop方法代码示例

本文整理汇总了Python中neon.models.Model.bprop方法的典型用法代码示例。如果您正苦于以下问题:Python Model.bprop方法的具体用法?Python Model.bprop怎么用?Python Model.bprop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在neon.models.Model的用法示例。


在下文中一共展示了Model.bprop方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_conv_rnn

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]
def test_conv_rnn(backend_default):
    train_shape = (1, 17, 142)

    be = NervanaObject.be
    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    delta = be.array(be.rng.randn(10, be.bsz))

    init_norm = Gaussian(loc=0.0, scale=0.01)
    bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(),
                        depth=1, reset_cells=True)
    birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(),
                        depth=1, reset_cells=True, batch_norm=False)
    birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(),
                        depth=2, reset_cells=True, batch_norm=False)
    bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(),
                        depth=1, reset_cells=True, batch_norm=True)
    birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(),
                         depth=1, reset_cells=True, batch_norm=False, bi_sum=True)
    rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True)
    lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True)
    gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True)

    rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru]

    for rl in rlayers:
        layers = [
                    Conv((2, 2, 4), init=init_norm, activation=Rectlin(),
                         strides=dict(str_h=2, str_w=4)),
                    Pooling(2, strides=2),
                    Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(),
                         strides=dict(str_h=1, str_w=2)),
                    rl,
                    RecurrentMean(),
                    Affine(nout=10, init=init_norm, activation=Rectlin()),
                ]
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        model.fprop(inp)
        model.bprop(delta)
开发者ID:StevenLOL,项目名称:neon,代码行数:42,代码来源:test_model.py

示例2: test_reshape_layer_model

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]
def test_reshape_layer_model(backend_default, fargs):
    """
    test cases:
    - conv before RNNs
    - conv after RNNs
    - conv after LUT
    """
    np.random.seed(seed=0)

    nin, nout, bsz = fargs
    be = backend_default
    be.bsz = bsz
    input_size = (nin, be.bsz)

    init = Uniform(-0.1, 0.1)
    g_uni = GlorotUniform()

    inp_np = np.random.rand(nin, be.bsz)
    delta_np = np.random.rand(nout, be.bsz)

    inp = be.array(inp_np)
    delta = be.array(delta_np)

    conv_lut_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 100, -1)),
        Conv((3, 3, 16), init=init),
        LSTM(64, g_uni, activation=Tanh(),
             gate_activation=Logistic(), reset_cells=True),
        RecurrentSum(),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_lut_2 = [
        LookupTable(vocab_size=1000, embedding_dim=400, init=init),
        Reshape(reshape=(4, 50, -1)),
        Conv((3, 3, 16), init=init),
        Pooling(2, strides=2),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    conv_rnn_1 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        LSTM(64, g_uni, activation=Tanh(),
             gate_activation=Logistic(), reset_cells=True),
        Reshape(reshape=(4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    conv_rnn_2 = [
        LookupTable(vocab_size=2000, embedding_dim=400, init=init),
        Recurrent(64, g_uni, activation=Tanh(), reset_cells=True),
        Reshape(reshape=(4, -1, 32)),
        Conv((3, 3, 16), init=init),
        Affine(nout, init, bias=init, activation=Softmax())
    ]

    lut_sum_1 = [
        LookupTable(vocab_size=1000, embedding_dim=128, init=init),
        RecurrentSum(),
        Affine(nout=nout, init=init, bias=init, activation=Softmax()),
    ]

    lut_birnn_1 = [
        LookupTable(vocab_size=1000, embedding_dim=200, init=init),
        DeepBiRNN(32, init=GlorotUniform(), batch_norm=True, activation=Tanh(),
                  reset_cells=True, depth=1),
        Reshape((4, 32, -1)),
        Conv((3, 3, 16), init=init),
        Affine(nout=nout, init=init, bias=init, activation=Softmax())
    ]

    layers_test = [conv_lut_1, conv_lut_2, conv_rnn_1, conv_rnn_2, lut_sum_1, lut_birnn_1]

    for lg in layers_test:
        model = Model(layers=lg)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(input_size, cost)
        model.fprop(inp)
        model.bprop(delta)
开发者ID:Jokeren,项目名称:neon,代码行数:83,代码来源:test_reshape_layer.py

示例3: test_model_serialize

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]
def test_model_serialize(backend_default, data):
    dataset = MNIST(path=data)
    (X_train, y_train), (X_test, y_test), nclass = dataset.load_data()
    train_set = ArrayIterator(
        [X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = Sequential([Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()),
                        Pooling(2),
                        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
    path2 = Sequential([Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()),
                        Dropout(keep=0.5),
                        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
    layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
              Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    mlp.initialize(train_set, cost=mlp.cost)
    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    mlp.save_params(tmp_save, keep_states=True)

    # Load model
    mlp = Model(tmp_save)

    mlp.initialize(train_set)
    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert allclose_with_out(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert allclose_with_out(_s, _s_e)
            else:
                assert allclose_with_out(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            assert type(p) == type(p_e)
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert allclose_with_out(_p, _p_e)
            elif isinstance(p, np.ndarray):
                assert allclose_with_out(p, p_e)
            else:
                assert p == p_e

    os.remove(tmp_save)
开发者ID:StevenLOL,项目名称:neon,代码行数:82,代码来源:test_model.py

示例4: zip

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]
    im.set(im2)
    l.set(l2)

# run fprop and bprop on this minibatch save the results
out_fprop = model.fprop(im)
out_fprop_save = [x.get() for x in out_fprop]
im.set(im_save)
out_fprop = model.fprop(im)
out_fprop_save2 = [x.get() for x in out_fprop]
for x, y in zip(out_fprop_save, out_fprop_save2):
    assert np.max(np.abs(x-y)) == 0.0, '2 fprop iterations do not match'

# run fit fot 1 minibatch
# have to do this by hand
delta = model.cost.get_errors(im, l)
model.bprop(delta)
if args.resume:
    model.optimizer = opt
model.optimizer.optimize(model.layers_to_optimize, epoch=model.epoch_index)

# run fprop again as a measure of the model state
out_fprop = model.fprop(im)
out_fprop_save2 = [x.get() for x in out_fprop]

if not args.resume:
    with open('serial_test_out1.pkl', 'w') as fid:
        pickle.dump([out_fprop_save, out_fprop_save2], fid)
else:
    # load up the saved file and compare
    with open('serial_test_out1.pkl', 'r') as fid:
        run1 = pickle.load(fid)
开发者ID:Jicheng-Yan,项目名称:neon,代码行数:33,代码来源:inception.py

示例5: __init__

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]

#.........这里部分代码省略.........
  def _setInput(self, states):
    # change order of axes to match what Neon expects
    states = np.transpose(states, axes = (1, 2, 3, 0))
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 4
    assert len(poststates.shape) == 4
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]

    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # have to serialize also states for batch normalization to work
      pdict = self.model.get_description(get_weights=True, keep_states=True)
      self.target_model.deserialize(pdict, load_states=True)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    # It seems neccessary for cpu backend.
    targets = preq.asnumpyarray().copy()

    # clip rewards between -1 and 1
    rewards = np.clip(rewards, self.min_reward, self.max_reward)

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_params(load_path)

  def save_weights(self, save_path):
    self.model.save_params(save_path)
开发者ID:loofahcus,项目名称:simple_dqn,代码行数:104,代码来源:deepqnetwork.py

示例6:

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]
while flag:
    for (x, t) in data:
        iter += 1
        if iter > num_iterations:
            flag = False
            break
        if iter > config.num_warmup_iters:  # time it
            if config.backend == 'cpu':
                s = time.time()*1000
                x = network.fprop(x)
                cost_iter = network.cost.get_cost(x, t)
                e = time.time()*1000  # in milliseconds
                forward_time[iter - config.num_warmup_iters - 1] = e - s
                s = time.time()*1000
                delta = network.cost.get_errors(x, t)  # gradient of the cost
                network.bprop(delta)
                e = time.time()*1000
                backward_time[iter - config.num_warmup_iters - 1] = e - s
            else:
                start.record()
                x = network.fprop(x)
                cost_iter = network.cost.get_cost(x, t)
                end.record()
                end.synchronize()
                forward_time[iter - config.num_warmup_iters - 1] \
                    = end.time_since(start)
                start.record()
                delta = network.cost.get_errors(x, t)
                network.bprop(delta)
                end.record()
                end.synchronize()
开发者ID:DL-Benchmarks,项目名称:DL-Benchmarks,代码行数:33,代码来源:lenet.py

示例7: __init__

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]

#.........这里部分代码省略.........
      # HACK: serialize network to disk and read it back to clone
      filename = os.path.join(self.save_weights_path, "target_network.pkl")
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self.setTensor(poststates)
    postq = self.target_model.fprop(self.tensor, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    maxpostq = self.be.max(postq, axis=0).asnumpyarray()
    assert maxpostq.shape == (1, self.batch_size)

    # feed-forward pass for prestates
    self.setTensor(prestates)
    preq = self.model.fprop(self.tensor, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray()

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      if terminals[i]:
        targets[action, i] = float(rewards[i])
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

    # calculate statistics
    if self.callback:
      self.callback.on_train(cost.asnumpyarray()[0,0])

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self.setTensor(states)
    qvalues = self.model.fprop(self.tensor, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # find the action with highest q-value
    actions = self.be.argmax(qvalues, axis = 0)
    assert actions.shape == (1, self.batch_size)

    # take only the first result
    return actions.asnumpyarray()[0,0]

  def getMeanQ(self, states):
    assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)

    # calculate Q-values for the states
    self.setTensor(states)
    qvalues = self.model.fprop(self.tensor, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    
    # take maximum Q-value for each state
    actions = self.be.max(qvalues, axis = 0)
    assert actions.astensor().shape == (1, self.batch_size)
    
    # calculate mean Q-value of all states
    meanq = self.be.mean(actions, axis = 1)
    assert meanq.astensor().shape == (1, 1)

    # return the mean
    return meanq.asnumpyarray()[0,0]

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
开发者ID:rockhowse,项目名称:simple_dqn,代码行数:104,代码来源:deepqnetwork.py

示例8: __init__

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]

#.........这里部分代码省略.........
      filename = self.save_weights_prefix + "_target.pkl"
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    postq = postq.asnumpyarray()
    maxsteerq = np.max(postq[:self.num_steers,:], axis=0)
    assert maxsteerq.shape == (self.batch_size,), "size: %s" % str(maxsteerq.shape)
    maxspeedq = np.max(postq[-self.num_speeds:,:], axis=0)
    assert maxspeedq.shape == (self.batch_size,)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    # HACK: copy() was needed to make it work on CPU
    targets = preq.asnumpyarray().copy()

    # update Q-value targets for actions taken
    for i, (steer, speed) in enumerate(zip(steers, speeds)):
      if terminals[i]:
        targets[steer, i] = float(rewards[i])
        targets[self.num_steers + speed, i] = float(rewards[i])
      else:
        targets[steer, i] = float(rewards[i]) + self.discount_rate * maxsteerq[i]
        targets[self.num_steers + speed, i] = float(rewards[i]) + self.discount_rate * maxspeedq[i]

    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 2 * self.batch_size, str(np.count_nonzero(deltas.asnumpyarray()))

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)
    #print "cost:", cost.asnumpyarray()

    # clip errors
    if self.clip_error:
      self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    '''
    if np.any(rewards < 0):
        preqq = preq.asnumpyarray().copy()
        self._setInput(prestates)
        qvalues = self.model.fprop(self.input, inference = True).asnumpyarray().copy()
        indexes = rewards < 0
        print "indexes:", indexes
        print "preq:", preqq[:, indexes].T
        print "preq':", qvalues[:, indexes].T
        print "diff:", (qvalues[:, indexes]-preqq[:, indexes]).T
        print "steers:", steers[indexes]
        print "speeds:", speeds[indexes]
        print "rewards:", rewards[indexes]
        print "terminals:", terminals[indexes]
        print "preq[0]:", preqq[:, 0]
        print "preq[0]':", qvalues[:, 0]
        print "diff:", qvalues[:, 0] - preqq[:, 0]
        print "deltas:", deltas.asnumpyarray()[:, indexes].T
        raw_input("Press Enter to continue...")
    '''

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == (self.batch_size, self.state_size)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
开发者ID:tambetm,项目名称:botmobile,代码行数:104,代码来源:deepqnetwork.py

示例9: __init__

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]

#.........这里部分代码省略.........
    states = np.transpose(states)
    # copy() shouldn't be necessary here, but Neon doesn't work otherwise
    self.input.set(states.copy())
    # normalize network input between 0 and 1
    # self.be.divide(self.input, 255, self.input)

  def train(self, minibatch, epoch):
    # expand components of minibatch
    prestates, actions, speed_actions, rewards, poststates, terminals = minibatch
    assert len(prestates.shape) == 2
    assert len(poststates.shape) == 2
    assert len(actions.shape) == 1
    assert len(rewards.shape) == 1
    assert len(terminals.shape) == 1
    assert prestates.shape == poststates.shape
    assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
    #print "WE ARE ACTUALLY TRAINING IN HERE"
    if self.target_steps and self.train_iterations % self.target_steps == 0:
      # HACK: serialize network to disk and read it back to clone
      filename = self.save_weights_prefix + "_target.pkl"
      save_obj(self.model.serialize(keep_states = False), filename)
      self.target_model.load_weights(filename)

    # feed-forward pass for poststates to get Q-values
    self._setInput(poststates)
    postq = self.target_model.fprop(self.input, inference = True)
    assert postq.shape == (self.num_actions, self.batch_size)

    # calculate max Q-value for each poststate
    postq = postq.asnumpyarray()
    maxpostq = np.max(postq, axis=0)
    #print maxpostq.shape
    assert maxpostq.shape == (self.batch_size,)

    # feed-forward pass for prestates
    self._setInput(prestates)
    preq = self.model.fprop(self.input, inference = False)
    assert preq.shape == (self.num_actions, self.batch_size)

    # make copy of prestate Q-values as targets
    targets = preq.asnumpyarray().copy()

    # update Q-value targets for actions taken
    for i, action in enumerate(actions):
      self.action_count[action] += 1
      if terminals[i]:
        targets[action, i] = float(rewards[i])
        if rewards[i] == -1000:
            print "######################### action ", action, "should never be sampled again"
        print "sampled_terminal"
      else:
        targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[i]
        #targets[i,action] = float(rewards[i]) + self.discount_rate * maxpostq[i]
    #print "action count", self.action_count
    # copy targets to GPU memory
    self.targets.set(targets)

    # calculate errors
    deltas = self.cost.get_errors(preq, self.targets)
    assert deltas.shape == (self.num_actions, self.batch_size)
    #assert np.count_nonzero(deltas.asnumpyarray()) == 32
    print "nonzero deltas", np.count_nonzero(deltas.asnumpyarray())

    # calculate cost, just in case
    cost = self.cost.get_cost(preq, self.targets)
    assert cost.shape == (1,1)
    print "cost:", cost.asnumpyarray()

    # clip errors
    #if self.clip_error:
    #  self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)

    # perform back-propagation of gradients
    self.model.bprop(deltas)

    # perform optimization
    self.optimizer.optimize(self.model.layers_to_optimize, epoch)

    # increase number of weight updates (needed for target clone interval)
    self.train_iterations += 1

  def predict(self, states):
    # minibatch is full size, because Neon doesn't let change the minibatch size
    assert states.shape == (self.batch_size, self.state_size)

    # calculate Q-values for the states
    self._setInput(states)
    qvalues = self.model.fprop(self.input, inference = True)
    assert qvalues.shape == (self.num_actions, self.batch_size)
    if logger.isEnabledFor(logging.DEBUG):
      logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))

    # transpose the result, so that batch size is first dimension
    return qvalues.T.asnumpyarray()

  def load_weights(self, load_path):
    self.model.load_weights(load_path)

  def save_weights(self, save_path):
    save_obj(self.model.serialize(keep_states = True), save_path)
开发者ID:tambetm,项目名称:botmobile,代码行数:104,代码来源:deepqnetwork_steer.py

示例10: DQNNeon

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]

#.........这里部分代码省略.........
        assert len(poststates.shape) == 4
        assert len(actions.shape) == 1
        assert len(rewards.shape) == 1
        assert len(terminals.shape) == 1
        assert prestates.shape == poststates.shape
        assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
        # feed-forward pass for poststates to get Q-values
        self._prepare_network_input(poststates)
        postq = self.target_model.fprop(self.input, inference = True)
        assert postq.shape == (self.output_shape, self.batch_size)
        # calculate max Q-value for each poststate
        maxpostq = self.be.max(postq, axis=0).asnumpyarray()
        assert maxpostq.shape == (1, self.batch_size)
        # average maxpostq for stats
        maxpostq_avg = maxpostq.mean()
        # feed-forward pass for prestates
        self._prepare_network_input(prestates)
        preq = self.model.fprop(self.input, inference = False)
        assert preq.shape == (self.output_shape, self.batch_size)
        # make copy of prestate Q-values as targets
        targets = preq.asnumpyarray()
        # clip rewards between -1 and 1
        rewards = np.clip(rewards, self.min_reward, self.max_reward)
        # update Q-value targets for each state only at actions taken
        for i, action in enumerate(actions):
            if terminals[i]:
                targets[action, i] = float(rewards[i])
            else:
                targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
        # copy targets to GPU memory
        self.targets.set(targets)
        # calculate errors
        errors = self.cost_func.get_errors(preq, self.targets)
        assert errors.shape == (self.output_shape, self.batch_size)
        # average error where there is a error (should be 1 in every row)
        #TODO: errors_avg = np.sum(errors)/np.size(errors[errors>0.])
        # clip errors
        if self.clip_error:
            self.be.clip(errors, -self.clip_error, self.clip_error, out = errors)
        # calculate cost, just in case
        cost = self.cost_func.get_cost(preq, self.targets)
        assert cost.shape == (1,1)
        # perform back-propagation of gradients
        self.model.bprop(errors)
        # perform optimization
        self.optimizer.optimize(self.model.layers_to_optimize, epoch)
        # increase number of weight updates (needed for target clone interval)
        self.update_iterations += 1
        if self.target_update_frequency and self.update_iterations % self.target_update_frequency == 0:
            self._copy_theta()
            _logger.info("Network update #%d: Cost = %s, Avg Max Q-value = %s" % (self.update_iterations, str(cost.asnumpyarray()[0][0]), str(maxpostq_avg)))
        # update statistics
        if self.callback:
            self.callback.from_learner(cost.asnumpyarray()[0,0], maxpostq_avg)

    def get_Q(self, state):
        """ Calculates the Q-values for one mini-batch.

        Args:
            state(numpy.ndarray): Single state, shape=(sequence_length,frame_width,frame_height).

        Returns:
            q_values (numpy.ndarray): Results for first element of mini-batch from one forward pass through the network, shape=(self.output_shape,)
        """
        _logger.debug("State shape = %s" % str(state.shape))
        # minibatch is full size, because Neon doesn't let change the minibatch size
        # so we need to run 32 forward steps to get the one we actually want
        self.dummy_batch[0] = state
        states = self.dummy_batch
        assert states.shape == ((self.batch_size, self.sequence_length,) + self.frame_dims)
        # calculate Q-values for the states
        self._prepare_network_input(states)
        qvalues = self.model.fprop(self.input, inference = True)
        assert qvalues.shape == (self.output_shape, self.batch_size)
        _logger.debug("Qvalues: %s" % (str(qvalues.asnumpyarray()[:,0])))
        return qvalues.asnumpyarray()[:,0]

    def _copy_theta(self):
        """ Copies the weights of the current network to the target network. """
        _logger.debug("Copying weights")
        pdict = self.model.get_description(get_weights=True, keep_states=True)
        self.target_model.deserialize(pdict, load_states=True)

    def save_weights(self, target_dir, epoch):
        """ Saves the current network parameters to disk.

        Args:
            target_dir (str): Directory where the network parameters are stored for each episode.
            epoch (int): Current epoch.
        """
        filename = "%s_%s_%s_%d.prm" % (str(self.args.game.lower()), str(self.args.net_type.lower()), str(self.args.optimizer.lower()), (epoch + 1))
        self.model.save_params(os.path.join(target_dir, filename))

    def load_weights(self, source_file):
        """ Loads the network parameters from a given file.

        Args:
            source_file (str): Complete path to a file with network parameters.
        """
        self.model.load_params(source_file)
开发者ID:maurolopes,项目名称:deepatari,代码行数:104,代码来源:dqnneon.py

示例11: test_model_serialize

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]
def test_model_serialize(backend):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = [Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()),
             Pooling(2),
             Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())]
    path2 = [Dropout(keep=0.5),
             Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())]
    layers = [MergeConcat([path1, path2]),
              Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
              BatchNorm(),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    save_obj(mlp.serialize(keep_states=True), tmp_save)

    # Load model
    mlp = Model(layers=layers)
    mlp.load_weights(tmp_save)

    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            else:
                assert np.allclose(p, p_e)

    os.remove(tmp_save)
开发者ID:rupertsmall,项目名称:neon,代码行数:77,代码来源:test_model.py

示例12: ModelRunnerNeon

# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import bprop [as 别名]

#.........这里部分代码省略.........
        initializer = self.get_initializer(input_size = 7 * 7 * 64)
        layers.append(Affine(nout=512, init=initializer, bias=initializer, activation=Rectlin()))
        
        initializer = self.get_initializer(input_size = 512)
        layers.append(Affine(nout=max_action_no, init=initializer, bias=initializer))
        
        return layers        
        
    def clip_reward(self, reward):
        if reward > self.args.clip_reward_high:
            return self.args.clip_reward_high
        elif reward < self.args.clip_reward_low:
            return self.args.clip_reward_low
        else:
            return reward

    def set_input(self, data):
        if self.use_gpu_replay_mem:
            self.be.copy_transpose(data, self.input_uint8, axes=(1, 2, 3, 0))
            self.input[:] = self.input_uint8 / 255
        else:
            self.input.set(data.transpose(1, 2, 3, 0).copy())
            self.be.divide(self.input, 255, self.input)

    def predict(self, history_buffer):
        self.set_input(history_buffer)
        output  = self.train_net.fprop(self.input, inference=True)
        return output.T.asnumpyarray()[0]            

    def print_weights(self):
        pass

    def train(self, minibatch, replay_memory, learning_rate, debug):
        if self.args.prioritized_replay == True:
            prestates, actions, rewards, poststates, terminals, replay_indexes, heap_indexes, weights = minibatch
        else:
            prestates, actions, rewards, poststates, terminals = minibatch
        
        # Get Q*(s, a) with targetNet
        self.set_input(poststates)
        post_qvalue = self.target_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        if self.args.double_dqn == True:
            # Get Q*(s, a) with trainNet
            post_qvalue2 = self.train_net.fprop(self.input, inference=True).T.asnumpyarray()
        
        # Get Q(s, a) with trainNet
        self.set_input(prestates)
        pre_qvalue = self.train_net.fprop(self.input, inference=False)
        
        label = pre_qvalue.asnumpyarray().copy()
        for i in range(0, self.train_batch_size):
            if self.args.clip_reward:
                reward = self.clip_reward(rewards[i])
            else:
                reward = rewards[i]
            if terminals[i]:
                label[actions[i], i] = reward
            else:
                if self.args.double_dqn == True:
                    max_index = np.argmax(post_qvalue2[i])
                    label[actions[i], i] = reward + self.discount_factor* post_qvalue[i][max_index]
                else:
                    label[actions[i], i] = reward + self.discount_factor* np.max(post_qvalue[i])

        # copy targets to GPU memory
        self.targets.set(label)
    
        delta = self.cost.get_errors(pre_qvalue, self.targets)
        
        if self.args.prioritized_replay == True:
            delta_value = delta.asnumpyarray()
            for i in range(self.train_batch_size):
                if debug:
                    print 'weight[%s]: %.5f, delta: %.5f, newDelta: %.5f' % (i, weights[i], delta_value[actions[i], i], weights[i] * delta_value[actions[i], i]) 
                replay_memory.update_td(heap_indexes[i], abs(delta_value[actions[i], i]))
                delta_value[actions[i], i] = weights[i] * delta_value[actions[i], i]
            delta.set(delta_value.copy())
          
        if self.args.clip_loss:
            self.be.clip(delta, -1.0, 1.0, out = delta)
                
        self.train_net.bprop(delta)
        self.optimizer.optimize(self.train_net.layers_to_optimize, epoch=0)

    def update_model(self):
        # have to serialize also states for batch normalization to work
        pdict = self.train_net.get_description(get_weights=True, keep_states=True)
        self.target_net.deserialize(pdict, load_states=True)
        #print ('Updated target model')

    def finish_train(self):
        self.running = False
    
    def load(self, file_name):
        self.train_net.load_params(file_name)
        self.update_model()
        
    def save(self, file_name):
        self.train_net.save_params(file_name)
开发者ID:only4hj,项目名称:DeepRL,代码行数:104,代码来源:model_neon.py


注:本文中的neon.models.Model.bprop方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。