本文整理汇总了Python中theano.tensor.nnet.softmax函数的典型用法代码示例。如果您正苦于以下问题:Python softmax函数的具体用法?Python softmax怎么用?Python softmax使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了softmax函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bench_ConvLarge
def bench_ConvLarge(batchsize, variant=True):
name = "ConvLarge_b" + str(GlobalBenchReporter.batch_size)
name += "_" + config.linker
# Image shape 256x256
GlobalBenchReporter.batch_size = batchsize
data_x.set_value(randn(n_examples, 1, 256, 256))
w0 = shared(rand(6, 1, 7, 7) * numpy.sqrt(6 / (25.)))
b0 = shared(zeros(6))
w1 = shared(rand(16, 6, 7, 7) * numpy.sqrt(6 / (25.)))
b1 = shared(zeros(16))
vv = shared(rand(16 * 11 * 11, 120) * numpy.sqrt(6.0 / 16. / 25))
cc = shared(zeros(120))
v = shared(zeros(120, outputs))
c = shared(zeros(outputs))
params = [w0, b0, w1, b1, v, c, vv, cc]
c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 256, 256),
filter_shape=(6, 1, 7, 7)) + b0.dimshuffle(0, 'x', 'x'))
# this is not the correct leNet5 model, but it's closer to
s0 = tanh(max_pool_2d(c0, (5, 5)))
c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 50, 50),
filter_shape=(16, 6, 7, 7)) + b1.dimshuffle(0, 'x', 'x'))
s1 = tanh(max_pool_2d(c1, (4, 4)))
p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c)
nll = -log(p_y_given_x)[arange(sy.shape[0]), sy]
cost = nll.mean()
gparams = grad(cost, params)
train = function([si, nsi], cost,
updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)],
name=name)
GlobalBenchReporter.eval_model(train, name)
if not variant:
return
# Versions with no inputs
snsi.set_value(GlobalBenchReporter.batch_size)
c0 = tanh(conv2d(ssx, w0, image_shape=(batchsize, 1, 256, 256),
filter_shape=(6, 1, 7, 7)) + b0.dimshuffle(0, 'x', 'x'))
# this is not the correct leNet5 model, but it's closer to
s0 = tanh(max_pool_2d(c0, (5, 5)))
c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 50, 50),
filter_shape=(16, 6, 7, 7)) + b1.dimshuffle(0, 'x', 'x'))
s1 = tanh(max_pool_2d(c1, (4, 4)))
p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv) + cc), v) + c)
nll = -log(p_y_given_x)[arange(ssy.shape[0]), ssy]
cost = nll.mean()
gparams = grad(cost, params)
train2 = function([], cost,
updates=[(p, p - lr * gp) for p, gp in zip(params, gparams)] + [(ssi, ssi + snsi)],
name=name)
GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
示例2: test_optimize_xent_vector3
def test_optimize_xent_vector3(self):
# Same as test_optimize_xent_vector2, but y is the result of
# a "flatten", and it used to make the constant-folding
# of arange(y.shape[0]) happen before the xent optimization
verbose = 0
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(5).astype(config.floatX)
b_val = rng.randn(5).astype(config.floatX)
y_val = numpy.asarray([2])
x = T.vector('x')
b = T.vector('b')
y_ = T.lvector('y_')
y = y_.flatten()
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions:
f = theano.function([x, b, y_], expr, mode=mode)
if verbose:
printing.debugprint(f)
try:
ops = [node.op for node in f.maker.fgraph.toposort()]
# [big_op, sum, dim_shuffle, flatten]
assert len(ops) <= 4
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops
if isinstance(o, T.AdvancedSubtensor)]
f(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(f)
raise
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
finally:
config.warn.sum_div_dimshuffle_bug = backup
if verbose:
printing.debugprint(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) <= 6
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_with_bias in ops
assert softmax_grad not in ops
g(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(g)
raise
示例3: test_optimize_xent_vector2
def test_optimize_xent_vector2(self):
verbose = 0
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(5)
b_val = rng.randn(5)
y_val = numpy.asarray([2])
x = T.dvector('x')
b = T.dvector('b')
y = T.lvector('y')
def print_graph(func):
for i, node in enumerate(func.maker.fgraph.toposort()):
print i, node
# Last node should be the output
print i, printing.pprint(node.outputs[0])
print
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
for expr in bias_expressions:
f = theano.function([x, b, y], expr, mode=mode)
if verbose:
print_graph(f)
try:
prev, last = f.maker.fgraph.toposort()[-2:]
assert len(f.maker.fgraph.toposort()) == 3
# [big_op, sum, dim_shuffle]
f(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(f)
raise
backup = config.warn.sum_div_dimshuffle_bug
config.warn.sum_div_dimshuffle_bug = False
try:
g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
finally:
config.warn.sum_div_dimshuffle_bug = backup
if verbose:
print_graph(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) <= 6
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax_with_bias in ops
assert softmax_grad not in ops
g(x_val, b_val, y_val)
except Exception:
theano.printing.debugprint(g)
raise
示例4: set_inpt
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
示例5: bench_deep1000
def bench_deep1000(variant=True):
name = "mlp_784_1000_1000_1000_10_b" + str(GlobalBenchReporter.batch_size)
name += "_" + config.linker
w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)), name='w0')
b0 = shared(zeros(1000), name='b0')
w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w1')
b1 = shared(zeros(1000), name='b1')
w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)), name='w2')
b2 = shared(zeros(1000), name='b2')
v = shared(zeros(1000, outputs), name='v')
c = shared(zeros(outputs), name='c')
if GlobalBenchReporter.batch_size == 1:
sx_ = sx.flatten()
sy_ = specify_shape(sy, [1])
ssx_ = ssx.flatten()
ssy_ = specify_shape(ssy, [1])
else:
sx_ = sx
sy_ = sy
ssx_ = ssx
ssy_ = ssy
params = [w0, b0, w1, b1, w2, b2, v, c]
h0 = tanh(dot(sx_, w0) + b0)
h1 = tanh(dot(h0, w1) + b1)
h2 = tanh(dot(h1, w2) + b2)
p_y_given_x = softmax(dot(h2, v) + c)
nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
cost = nll.mean()
gparams = grad(cost, params)
train = function([si, nsi], cost,
updates=[(p, p - lr * gp)
for p, gp in zip(params, gparams)],
name=name)
GlobalBenchReporter.eval_model(train, name)
if not variant:
return
# Version with no inputs
h0 = tanh(dot(ssx_, w0) + b0)
h1 = tanh(dot(h0, w1) + b1)
h2 = tanh(dot(h1, w2) + b2)
p_y_given_x = softmax(dot(h2, v) + c)
nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
cost = nll.mean()
gparams = grad(cost, params)
train2 = function([], cost,
updates=[(p, p - lr * gp)
for p, gp in zip(params, gparams)] + [(ssi, ssi + snsi)],
name=name)
snsi.set_value(GlobalBenchReporter.batch_size)
GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
示例6: set_inpt
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
print type(self.inpt), type(self.w), type(self.output)
candidates = theano.shared(np.asarray(xrange(0,2), dtype=theano.config.floatX), borrow=True)
# self.y_out = T.argmax(self.output, axis=1)
self.y_out = T.dot(self.output, candidates)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
示例7: set_inpt
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
# Output is masked by 1 - the probability of the dropout layer
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
# There is dropout in the output
self.inpt_dropout = CNN.core_layers.DropoutLayer.dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
示例8: output
def output(self, inpt, inpt_dropout, mini_batch_size):
""" Generate output from a particular inpt, given the weights and biases
An observation: inpt (w/o dropout) is used to feedforward to get the result.
On the other hand, inpt_dropout is mainly used for training """
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.dropout)*T.dot(self.inpt, self.W) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.W) + self.b)
示例9: test_optimize_xent_vector
def test_optimize_xent_vector(self):
verbose = 0
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(5).astype(config.floatX)
y_val = numpy.asarray([2])
x = T.vector('x')
y = T.lvector('y')
def print_graph(func):
for i, node in enumerate(func.maker.fgraph.toposort()):
print i, node
# Last node should be the output
print i, printing.pprint(node.outputs[0])
print
## Test that a biased softmax is optimized correctly
bias_expressions = [
T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
for expr in bias_expressions:
f = theano.function([x, y], expr, mode=mode)
if verbose:
print_graph(f)
try:
ops = [node.op for node in f.maker.fgraph.toposort()]
assert len(ops) == 5
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops
if isinstance(o, T.AdvancedSubtensor)]
f(x_val, y_val)
except Exception:
theano.printing.debugprint(f)
raise
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
print_graph(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) == 4
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax in ops
assert softmax_grad not in ops
g(x_val, y_val)
except Exception:
theano.printing.debugprint(g)
raise
示例10: bench_mlp_500
def bench_mlp_500(variant=True):
name = "mlp_784_500_10_b" + str(GlobalBenchReporter.batch_size)
name += "_" + config.linker
HUs = 500
w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)), name='w')
b = shared(zeros(HUs), name='b')
v = shared(zeros(outputs, HUs), name='v')
c = shared(zeros(outputs), name='c')
if GlobalBenchReporter.batch_size == 1:
sx_ = sx.flatten()
sy_ = specify_shape(sy, [1])
ssx_ = ssx.flatten()
ssy_ = specify_shape(ssy, [1])
else:
sx_ = sx
sy_ = sy
ssx_ = ssx
ssy_ = ssy
p_y_given_x = softmax(dot(tanh(dot(sx_, w.T) + b), v.T) + c)
nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
cost = nll.mean()
gw, gb, gv, gc = grad(cost, [w, b, v, c])
train = function([si, nsi], cost,
updates={w: w - lr * gw,
b: b - lr * gb,
v: v - lr * gv,
c: c - lr * gc},
name=name)
GlobalBenchReporter.eval_model(train, name)
if not variant:
return
# Version with no inputs
snsi.set_value(GlobalBenchReporter.batch_size)
p_y_given_x = softmax(dot(tanh(dot(ssx_, w.T) + b), v.T) + c)
nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
cost = nll.mean()
gw, gb, gv, gc = grad(cost, [w, b, v, c])
train2 = function([], cost,
updates={w: w - lr * gw,
b: b - lr * gb,
v: v - lr * gv,
c: c - lr * gc,
ssi: ssi + snsi},
name=name)
GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
示例11: bench_logreg
def bench_logreg(variant=True):
name = "mlp_784_10_b" + str(GlobalBenchReporter.batch_size)
name += "_" + config.linker
v = shared(zeros(outputs, inputs), name='v')
c = shared(zeros(outputs), name='c')
if GlobalBenchReporter.batch_size == 1:
sx_ = sx.flatten()
sy_ = specify_shape(sy, [1])
ssx_ = ssx.flatten()
ssy_ = specify_shape(ssy, [1])
else:
sx_ = sx
sy_ = sy
ssx_ = ssx
ssy_ = ssy
#
# Note on the transposed-ness of v for some reason, this data
# layout is faster than the non-transposed orientation.
# The change doesn't make much difference in the deeper models,
# but in this case it was more than twice as fast.
#
p_y_given_x = softmax(dot(sx_, v.T) + c)
nll = -log(p_y_given_x)[arange(sy_.shape[0]), sy_]
cost = nll.mean()
gv, gc = grad(cost, [v, c])
#theano.printing.debugprint(grad(cost, [v, c]), file=open('foo', 'wb'))
train = function([si, nsi], [],
updates={v: v - lr * gv, c: c - lr * gc},
name=name)
# theano.printing.debugprint(train, print_type=True)
GlobalBenchReporter.eval_model(train, name)
if not variant:
return
# Version with no inputs
snsi.set_value(GlobalBenchReporter.batch_size)
p_y_given_x = softmax(dot(ssx_, v.T) + c)
nll = -log(p_y_given_x)[arange(ssy_.shape[0]), ssy_]
cost = nll.mean()
gv, gc = grad(cost, [v, c])
train2 = function([], [],
updates={v: v - lr * gv, c: c - lr * gc,
ssi: ssi + snsi},
name=name)
GlobalBenchReporter.bypass_eval_model(train2, name, init_to_zero=ssi)
示例12: inner
def inner(mean, var):
# Generate samples of the distribution.
samples = rng.normal(size=mean.shape)
std = T.sqrt(var)
samples = samples * std + mean
if axis == 1:
result = softmax(samples) # XXX
result.name = 'susp1'
if axis == 2:
samples_flat = samples.reshape((samples.shape[0] * samples.shape[1], samples.shape[2]))
result_flat = softmax(samples_flat)
result = result.reshape(samples.shape)
return result, T.zeros_like(var)
示例13: test_xent_thing_int32
def test_xent_thing_int32(self):
verbose = 0
mode = theano.compile.mode.get_default_mode()
if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
mode = 'FAST_RUN'
rng = numpy.random.RandomState(utt.fetch_seed())
x_val = rng.randn(3, 5).astype(config.floatX)
y_val = numpy.asarray([2, 4, 1], dtype='int64')
x = T.matrix('x')
y = T.lvector('y')
yi = T.cast(y, 'int32')
expressions = [
T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi])
]
for expr in expressions:
# Verify the optimizer worked on the expressions
f = theano.function([x, y], expr, mode=mode)
if verbose:
theano.printing.debugprint(f)
try:
ops = [node.op for node in f.maker.fgraph.toposort()]
assert len(ops) == 5
assert crossentropy_softmax_argmax_1hot_with_bias in ops
assert not [1 for o in ops
if isinstance(o, T.AdvancedSubtensor)]
f(x_val, y_val)
except Exception:
theano.printing.debugprint(f)
raise
# Also verify the gradient wrt x
g = theano.function([x, y], T.grad(expr, x), mode=mode)
if verbose:
theano.printing.debugprint(g)
try:
ops = [node.op for node in g.maker.fgraph.toposort()]
assert len(ops) == 5
assert crossentropy_softmax_1hot_with_bias_dx in ops
assert softmax in ops
assert softmax_grad not in ops
g(x_val, y_val)
except Exception:
theano.printing.debugprint(g)
raise
示例14: bench_ConvMed
def bench_ConvMed(batchsize):
data_x.value = randn(n_examples, 1, 96, 96)
w0 = shared(rand(6, 1, 7, 7) * numpy.sqrt(6 / (25.)))
b0 = shared(zeros(6))
w1 = shared(rand(16, 6, 7, 7) * numpy.sqrt(6 / (25.)))
b1 = shared(zeros(16))
vv = shared(rand(16*8*8, 120) * numpy.sqrt(6.0/16./25))
cc = shared(zeros(120))
v = shared(zeros(120, outputs))
c = shared(zeros(outputs))
params = [w0, b0, w1, b1, v, c, vv, cc]
c0 = tanh(conv2d(sx, w0, image_shape=(batchsize, 1, 96, 96), filter_shape=(6,1,7,7)) + b0.dimshuffle(0, 'x', 'x'))
s0 = tanh(max_pool_2d(c0, (3,3))) # this is not the correct leNet5 model, but it's closer to
c1 = tanh(conv2d(s0, w1, image_shape=(batchsize, 6, 30, 30), filter_shape=(16,6,7,7)) + b1.dimshuffle(0, 'x', 'x'))
s1 = tanh(max_pool_2d(c1, (3,3)))
p_y_given_x = softmax(dot(tanh(dot(s1.flatten(2), vv)+cc), v)+c)
nll = -log(p_y_given_x)[arange(sy.shape[0]), sy]
cost = nll.mean()
gparams = grad(cost, params)
train = function([si, nsi], cost,
updates=[(p,p-lr*gp) for p,gp in zip(params, gparams)])
eval_and_report(train, "ConvMed", [batchsize], N=120)
示例15: lstm_output
def lstm_output(self, y_prev, ch_prev):
"""calculates info to pass to next time step.
ch_prev is a vector of size 2*hdim"""
c_prev = ch_prev[:self.hdim]#T.vector('c_prev')
h_prev = ch_prev[self.hdim:]#T.vector('h_prev')
# gates (input, forget, output)
i_t = sigmoid(T.dot(self.Ui, h_prev))
f_t = sigmoid(T.dot(self.Uf, h_prev))
o_t = sigmoid(T.dot(self.Uo, h_prev))
# new memory cell
c_new_t = T.tanh(T.dot(self.Uc, h_prev))
# final memory cell
c_t = f_t * c_prev + i_t * c_new_t
# final hidden state
h_t = o_t * T.tanh(c_t)
# Input vector for softmax
theta_t = T.dot(self.U, h_t) + self.b
# Softmax prob vector
y_hat_t = softmax(theta_t.T).T
# Softmax wraps output in another list, why??
# (specifically it outputs a 2-d row, not a 1-d column)
# y_hat_t = y_hat_t[0]
# Compute new cost
out_label = T.argmax(y_hat_t)
# final joint state
ch_t = T.concatenate([c_t, h_t])
return (out_label, ch_t), scan_module.until(T.eq(out_label, self.out_end))