本文整理汇总了Python中neon.models.Model.load_weights方法的典型用法代码示例。如果您正苦于以下问题:Python Model.load_weights方法的具体用法?Python Model.load_weights怎么用?Python Model.load_weights使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类neon.models.Model
的用法示例。
在下文中一共展示了Model.load_weights方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_model_serialize
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
def test_model_serialize(backend_default, data):
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data)
train_set = DataIterator(
[X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
init_norm = Gaussian(loc=0.0, scale=0.01)
# initialize model
path1 = Sequential([Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()),
Pooling(2),
Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
path2 = Sequential([Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()),
Dropout(keep=0.5),
Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())])
layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
tmp_save = 'test_model_serialize_tmp_save.pickle'
mlp = Model(layers=layers)
mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp.initialize(train_set, cost=mlp.cost)
n_test = 3
num_epochs = 3
# Train model for num_epochs and n_test batches
for epoch in range(num_epochs):
for i, (x, t) in enumerate(train_set):
x = mlp.fprop(x)
delta = mlp.cost.get_errors(x, t)
mlp.bprop(delta)
mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
if i > n_test:
break
# Get expected outputs of n_test batches and states of all layers
outputs_exp = []
pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
for i, (x, t) in enumerate(train_set):
outputs_exp.append(mlp.fprop(x, inference=True))
if i > n_test:
break
# Serialize model
save_obj(mlp.serialize(keep_states=True), tmp_save)
# Load model
mlp = Model(layers=layers)
mlp.load_weights(tmp_save)
outputs = []
pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
for i, (x, t) in enumerate(train_set):
outputs.append(mlp.fprop(x, inference=True))
if i > n_test:
break
# Check outputs, states, and params are the same
for output, output_exp in zip(outputs, outputs_exp):
assert np.allclose(output.get(), output_exp.get())
for pd, pd_exp in zip(pdicts, pdicts_exp):
for s, s_e in zip(pd['states'], pd_exp['states']):
if isinstance(s, list): # this is the batch norm case
for _s, _s_e in zip(s, s_e):
assert np.allclose(_s, _s_e)
else:
assert np.allclose(s, s_e)
for p, p_e in zip(pd['params'], pd_exp['params']):
assert type(p) == type(p_e)
if isinstance(p, list): # this is the batch norm case
for _p, _p_e in zip(p, p_e):
assert np.allclose(_p, _p_e)
elif isinstance(p, np.ndarray):
assert np.allclose(p, p_e)
else:
assert p == p_e
os.remove(tmp_save)
示例2: GeneralizedCost
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1))
layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1))
layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6
layers.append(Dropout(keep=0.5))
layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1))
layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1))
layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1))
layers.append(Pooling(6, op='avg'))
layers.append(Activation(Softmax()))
cost = GeneralizedCost(costfunc=CrossEntropyMulti())
mlp = Model(layers=layers)
if args.model_file:
import os
assert os.path.exists(args.model_file), '%s not found' % args.model_file
mlp.load_weights(args.model_file)
# configure callbacks
callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args)
if args.deconv:
callbacks.add_deconv_callback(train, test)
mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
test.exit_batch_provider()
train.exit_batch_provider()
示例3: Sequential
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
# model initialization
image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))])
sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')])
layers = [
MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
Dropout(keep=0.5),
LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]
cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))
checkpoint_model_path = "~/image_caption2.pickle"
checkpoint_schedule = range(num_epochs)
model = Model(layers=layers)
callbacks = Callbacks(model, train_set, args)
opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, clip_gradients=True,
gradient_limit=1.0)
# train model
model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
# load model (if exited) and evaluate bleu score on test set
model.load_weights(checkpoint_model_path)
test_set = ImageCaptionTest(path=data_path)
sents, targets = test_set.predict(model)
test_set.bleu_score(sents, targets)
示例4: LSTM
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
seq_len = 1
if return_sequences is True:
layers = [
LSTM(hidden, init, Logistic(), Tanh(), reset_cells=False),
Affine(train_set.nfeatures, init, bias=init, activation=Identity())
]
else:
layers = [
LSTM(hidden, init, Logistic(), Tanh(), reset_cells=False),
RecurrentLast(),
Affine(train_set.nfeatures, init, bias=init, activation=Identity())
]
model_new = Model(layers=layers)
model_new.load_weights(args.save_path)
model_new.initialize(dataset=(train_set.nfeatures, seq_len))
output = np.zeros((train_set.nfeatures, num_predict))
seed = time_series.train[:seed_seq_len]
x = model_new.be.empty((train_set.nfeatures, seq_len))
for s_in in seed:
x.set(s_in.reshape(train_set.nfeatures, seq_len))
y = model_new.fprop(x, inference=False)
for i in range(num_predict):
# Take last prediction and feed into next fprop
pred = y.get()[:, -1]
output[:, i] = pred
x[:] = pred.reshape(train_set.nfeatures, seq_len)
示例5: __init__
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
class DeepQNetwork:
def __init__(self, num_actions, args):
# create Neon backend
self.be = gen_backend(backend = args.backend,
batch_size = args.batch_size,
rng_seed = args.random_seed,
device_id = args.device_id,
default_dtype = np.dtype(args.datatype).type,
stochastic_round = args.stochastic_round)
# create model
layers = self.createLayers(num_actions)
self.model = Model(layers = layers)
self.cost = GeneralizedCost(costfunc = SumSquared())
self.optimizer = RMSProp(learning_rate = args.learning_rate,
decay_rate = args.rmsprop_decay_rate,
stochastic_round = args.stochastic_round)
# create target model
self.target_steps = args.target_steps
self.train_iterations = 0
if self.target_steps:
self.target_model = Model(layers = self.createLayers(num_actions))
self.save_weights_path = args.save_weights_path
else:
self.target_model = self.model
# remember parameters
self.num_actions = num_actions
self.batch_size = args.batch_size
self.discount_rate = args.discount_rate
self.history_length = args.history_length
self.screen_dim = (args.screen_height, args.screen_width)
self.clip_error = args.clip_error
# prepare tensors once and reuse them
self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,)
self.tensor = self.be.empty(self.input_shape)
self.tensor.lshape = self.input_shape # needed for convolutional networks
self.targets = self.be.empty((self.num_actions, self.batch_size))
self.callback = None
def createLayers(self, num_actions):
# create network
init_norm = Gaussian(loc=0.0, scale=0.01)
layers = []
# The first hidden layer convolves 32 filters of 8x8 with stride 4 with the input image and applies a rectifier nonlinearity.
layers.append(Conv((8, 8, 32), strides=4, init=init_norm, activation=Rectlin()))
# The second hidden layer convolves 64 filters of 4x4 with stride 2, again followed by a rectifier nonlinearity.
layers.append(Conv((4, 4, 64), strides=2, init=init_norm, activation=Rectlin()))
# This is followed by a third convolutional layer that convolves 64 filters of 3x3 with stride 1 followed by a rectifier.
layers.append(Conv((3, 3, 64), strides=1, init=init_norm, activation=Rectlin()))
# The final hidden layer is fully-connected and consists of 512 rectifier units.
layers.append(Affine(nout=512, init=init_norm, activation=Rectlin()))
# The output layer is a fully-connected linear layer with a single output for each valid action.
layers.append(Affine(nout = num_actions, init = init_norm))
return layers
def setTensor(self, states):
# change order of axes to match what Neon expects
states = np.transpose(states, axes = (1, 2, 3, 0))
# copy() shouldn't be necessary here, but Neon doesn't work otherwise
self.tensor.set(states.copy())
# normalize network input between 0 and 1
self.be.divide(self.tensor, 255, self.tensor)
def train(self, minibatch, epoch):
# expand components of minibatch
prestates, actions, rewards, poststates, terminals = minibatch
assert len(prestates.shape) == 4
assert len(poststates.shape) == 4
assert len(actions.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
if self.target_steps and self.train_iterations % self.target_steps == 0:
# HACK: push something through network, so that weights exist
self.model.fprop(self.tensor)
# HACK: serialize network to disk and read it back to clone
filename = os.path.join(self.save_weights_path, "target_network.pkl")
save_obj(self.model.serialize(keep_states = False), filename)
self.target_model.load_weights(filename)
# feed-forward pass for poststates to get Q-values
self.setTensor(poststates)
postq = self.target_model.fprop(self.tensor, inference = True)
assert postq.shape == (self.num_actions, self.batch_size)
# calculate max Q-value for each poststate
maxpostq = self.be.max(postq, axis=0).asnumpyarray()
assert maxpostq.shape == (1, self.batch_size)
# feed-forward pass for prestates
self.setTensor(prestates)
preq = self.model.fprop(self.tensor, inference = False)
assert preq.shape == (self.num_actions, self.batch_size)
#.........这里部分代码省略.........
示例6: __init__
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
#.........这里部分代码省略.........
layers.append(Affine(nout=512, init=init_norm, activation=Rectlin()))
# The output layer is a fully-connected linear layer with a single output for each valid action.
layers.append(Affine(nout=num_actions, init = init_norm))
return layers
def _setInput(self, states):
# change order of axes to match what Neon expects
states = np.transpose(states, axes = (1, 2, 3, 0))
# copy() shouldn't be necessary here, but Neon doesn't work otherwise
self.input.set(states.copy())
# normalize network input between 0 and 1
self.be.divide(self.input, 255, self.input)
def train(self, minibatch, epoch):
# expand components of minibatch
prestates, actions, rewards, poststates, terminals = minibatch
assert len(prestates.shape) == 4
assert len(poststates.shape) == 4
assert len(actions.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
if self.target_steps and self.train_iterations % self.target_steps == 0:
pdict = self.model.get_description(get_weights=True)
self.target_model.deserialize(pdict, load_states=False)
# feed-forward pass for poststates to get Q-values
self._setInput(poststates)
postq = self.target_model.fprop(self.input, inference = True)
assert postq.shape == (self.num_actions, self.batch_size)
# calculate max Q-value for each poststate
maxpostq = self.be.max(postq, axis=0).asnumpyarray()
assert maxpostq.shape == (1, self.batch_size)
# feed-forward pass for prestates
self._setInput(prestates)
preq = self.model.fprop(self.input, inference = False)
assert preq.shape == (self.num_actions, self.batch_size)
# make copy of prestate Q-values as targets
targets = preq.asnumpyarray()
# update Q-value targets for actions taken
for i, action in enumerate(actions):
if terminals[i]:
targets[action, i] = float(rewards[i])
else:
targets[action, i] = float(rewards[i]) + self.discount_rate * maxpostq[0,i]
# copy targets to GPU memory
self.targets.set(targets)
# calculate errors
deltas = self.cost.get_errors(preq, self.targets)
assert deltas.shape == (self.num_actions, self.batch_size)
#assert np.count_nonzero(deltas.asnumpyarray()) == 32
# calculate cost, just in case
cost = self.cost.get_cost(preq, self.targets)
assert cost.shape == (1,1)
# clip errors
if self.clip_error:
self.be.clip(deltas, -self.clip_error, self.clip_error, out = deltas)
# perform back-propagation of gradients
self.model.bprop(deltas)
# perform optimization
self.optimizer.optimize(self.model.layers_to_optimize, epoch)
# increase number of weight updates (needed for target clone interval)
self.train_iterations += 1
# calculate statistics
if self.callback:
self.callback.on_train(cost.asnumpyarray()[0,0])
def predict(self, states):
# minibatch is full size, because Neon doesn't let change the minibatch size
assert states.shape == ((self.batch_size, self.history_length,) + self.screen_dim)
# calculate Q-values for the states
self._setInput(states)
qvalues = self.model.fprop(self.input, inference = True)
assert qvalues.shape == (self.num_actions, self.batch_size)
if logger.isEnabledFor(logging.DEBUG):
logger.debug("Q-values: " + str(qvalues.asnumpyarray()[:,0]))
# transpose the result, so that batch size is first dimension
return qvalues.T.asnumpyarray()
def load_weights(self, load_path):
self.model.load_weights(load_path)
def save_weights(self, save_path):
save_obj(self.model.serialize(keep_states = True), save_path)
示例7: __init__
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
class DeepQNetwork:
def __init__(self, state_size, num_steers, num_speeds, args):
# remember parameters
self.state_size = state_size
self.num_steers = num_steers
self.num_speeds = num_speeds
self.num_actions = num_steers + num_speeds
self.num_layers = args.hidden_layers
self.hidden_nodes = args.hidden_nodes
self.batch_size = args.batch_size
self.discount_rate = args.discount_rate
self.clip_error = args.clip_error
# create Neon backend
self.be = gen_backend(backend = args.backend,
batch_size = args.batch_size,
rng_seed = args.random_seed,
device_id = args.device_id,
datatype = np.dtype(args.datatype).type,
stochastic_round = args.stochastic_round)
# prepare tensors once and reuse them
self.input_shape = (self.state_size, self.batch_size)
self.input = self.be.empty(self.input_shape)
self.targets = self.be.empty((self.num_actions, self.batch_size))
# create model
self.model = Model(layers = self._createLayers())
self.cost = GeneralizedCost(costfunc = SumSquared())
self.model.initialize(self.input_shape[:-1], self.cost)
if args.optimizer == 'rmsprop':
self.optimizer = RMSProp(learning_rate = args.learning_rate,
decay_rate = args.decay_rate,
stochastic_round = args.stochastic_round)
elif args.optimizer == 'adam':
self.optimizer = Adam(learning_rate = args.learning_rate,
stochastic_round = args.stochastic_round)
elif args.optimizer == 'adadelta':
self.optimizer = Adadelta(decay = args.decay_rate,
stochastic_round = args.stochastic_round)
else:
assert false, "Unknown optimizer"
# create target model
self.target_steps = args.target_steps
self.train_iterations = 0
if self.target_steps:
self.target_model = Model(layers = self._createLayers())
self.target_model.initialize(self.input_shape[:-1])
self.save_weights_prefix = args.save_weights_prefix
else:
self.target_model = self.model
def _createLayers(self):
# create network
init_norm = Gaussian(loc=0.0, scale=0.01)
layers = []
for i in xrange(self.num_layers):
layers.append(Affine(nout=self.hidden_nodes, init=init_norm, activation=Rectlin()))
layers.append(Affine(nout=self.num_actions, init = init_norm))
return layers
def _setInput(self, states):
# change order of axes to match what Neon expects
states = np.transpose(states)
# copy() shouldn't be necessary here, but Neon doesn't work otherwise
self.input.set(states.copy())
# normalize network input between 0 and 1
#self.be.divide(self.input, 200, self.input)
def train(self, minibatch, epoch = 0):
# expand components of minibatch
prestates, steers, speeds, rewards, poststates, terminals = minibatch
assert len(prestates.shape) == 2
assert len(poststates.shape) == 2
assert len(steers.shape) == 1
assert len(speeds.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == steers.shape[0] == speeds.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
if self.target_steps and self.train_iterations % self.target_steps == 0:
# HACK: serialize network to disk and read it back to clone
filename = self.save_weights_prefix + "_target.pkl"
save_obj(self.model.serialize(keep_states = False), filename)
self.target_model.load_weights(filename)
# feed-forward pass for poststates to get Q-values
self._setInput(poststates)
postq = self.target_model.fprop(self.input, inference = True)
assert postq.shape == (self.num_actions, self.batch_size)
# calculate max Q-value for each poststate
postq = postq.asnumpyarray()
maxsteerq = np.max(postq[:self.num_steers,:], axis=0)
assert maxsteerq.shape == (self.batch_size,), "size: %s" % str(maxsteerq.shape)
maxspeedq = np.max(postq[-self.num_speeds:,:], axis=0)
assert maxspeedq.shape == (self.batch_size,)
#.........这里部分代码省略.........
示例8: __init__
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
class DeepQNetwork:
def __init__(self, state_size, num_actions, args):
# remember parameters
self.state_size = state_size
self.num_actions = num_actions
self.batch_size = args.batch_size
self.discount_rate = args.discount_rate
self.clip_error = args.clip_error
self.action_count = np.zeros(21)
# create Neon backend
self.be = gen_backend(backend = args.backend,
batch_size = args.batch_size,
rng_seed = args.random_seed,
device_id = args.device_id,
datatype = np.dtype(args.datatype).type,
stochastic_round = args.stochastic_round)
# prepare tensors once and reuse them
self.input_shape = (self.state_size, self.batch_size)
self.input = self.be.empty(self.input_shape)
self.targets = self.be.empty((self.num_actions, self.batch_size))
# create model
layers = self._createLayers(num_actions)
self.model = Model(layers = layers)
self.cost = GeneralizedCost(costfunc = SumSquared())
self.model.initialize(self.input_shape[:-1], self.cost)
if args.optimizer == 'rmsprop':
self.optimizer = RMSProp(learning_rate = args.learning_rate,
decay_rate = args.decay_rate,
stochastic_round = args.stochastic_round)
elif args.optimizer == 'adam':
self.optimizer = Adam(learning_rate = args.learning_rate,
stochastic_round = args.stochastic_round)
elif args.optimizer == 'adadelta':
self.optimizer = Adadelta(decay = args.decay_rate,
stochastic_round = args.stochastic_round)
else:
assert False, "Unknown optimizer"
# create target model
self.target_steps = args.target_steps
self.train_iterations = 0
if self.target_steps:
self.target_model = Model(layers = self._createLayers(num_actions))
self.target_model.initialize(self.input_shape[:-1])
self.save_weights_prefix = args.save_weights_prefix
else:
self.target_model = self.model
def _createLayers(self, num_actions):
# create network
init_norm = Gaussian(loc=0.0, scale=0.01)
layers = []
# The final hidden layer is fully-connected and consists of 512 rectifier units.
layers.append(Affine(nout=64, init=init_norm, bias=init_norm, activation=Rectlin()))
# The output layer is a fully-connected linear layer with a single output for each valid action.
layers.append(Affine(nout=num_actions, init=init_norm, bias=init_norm))
return layers
def _setInput(self, states):
# change order of axes to match what Neon expects
states = np.transpose(states)
# copy() shouldn't be necessary here, but Neon doesn't work otherwise
self.input.set(states.copy())
# normalize network input between 0 and 1
# self.be.divide(self.input, 255, self.input)
def train(self, minibatch, epoch):
# expand components of minibatch
prestates, actions, speed_actions, rewards, poststates, terminals = minibatch
assert len(prestates.shape) == 2
assert len(poststates.shape) == 2
assert len(actions.shape) == 1
assert len(rewards.shape) == 1
assert len(terminals.shape) == 1
assert prestates.shape == poststates.shape
assert prestates.shape[0] == actions.shape[0] == rewards.shape[0] == poststates.shape[0] == terminals.shape[0]
#print "WE ARE ACTUALLY TRAINING IN HERE"
if self.target_steps and self.train_iterations % self.target_steps == 0:
# HACK: serialize network to disk and read it back to clone
filename = self.save_weights_prefix + "_target.pkl"
save_obj(self.model.serialize(keep_states = False), filename)
self.target_model.load_weights(filename)
# feed-forward pass for poststates to get Q-values
self._setInput(poststates)
postq = self.target_model.fprop(self.input, inference = True)
assert postq.shape == (self.num_actions, self.batch_size)
# calculate max Q-value for each poststate
postq = postq.asnumpyarray()
maxpostq = np.max(postq, axis=0)
#print maxpostq.shape
assert maxpostq.shape == (self.batch_size,)
# feed-forward pass for prestates
self._setInput(prestates)
preq = self.model.fprop(self.input, inference = False)
#.........这里部分代码省略.........
示例9: Dropout
# 需要导入模块: from neon.models import Model [as 别名]
# 或者: from neon.models.Model import load_weights [as 别名]
Dropout(keep=0.5),
Affine(2, init_glorot, bias=init_glorot, activation=Softmax())
]
print(layers)
cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
metric = Accuracy()
##########################################################################
model = Model(layers=layers)
optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients)
callbacks = Callbacks(model, train_set, args, eval_set=valid_set)
model.load_weights(os.path.join(args.data_dir, '128128_49_model_e2.pkl'))
print "Test Accuracy - ", 100 * model.eval(valid_set, metric=metric)
print "Train Accuracy - ", 100 * model.eval(train_set, metric=metric)
# output result directly
for x, y in valid_set:
x = model.fprop(x, inference=True)
print(x.get())
print(y.get())
break
#########################################################################
# continue training
# optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients)
# callbacks = Callbacks(model, train_set, args, eval_set=valid_set)