本文整理汇总了Python中nn.math.softmax函数的典型用法代码示例。如果您正苦于以下问题:Python softmax函数的具体用法?Python softmax怎么用?Python softmax使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了softmax函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compute_seq_ppl
def compute_seq_ppl(self, xs, ys):
#### YOUR CODE HERE ####
J = 0
ns = len(xs)
hs = zeros((ns+1, self.hdim))
cs = zeros((ns, self.cdim))
# predicted probas
ps = zeros((ns, self.Udim))
#### YOUR CODE HERE ####
L = self.sparams.L
Lc = self.Lcluster
cfreq = self.cfreq
cwords = self.cwords
direct_size = self.hsize
U = self.params.U
H = self.params.H
C = zeros((self.cdim, self.hdim))
if self.isCompression is True:
C = self.params.C
##
# Forward propagation
for i in xrange(ns):
hs[i+1] = sigmoid(H.dot(hs[i]) + L[xs[i]])
#hs[i+1] = 2.0/(1 + exp(-2.0*(H.dot(hs[i]) + L[xs[i]]))) - 1
#without maximum entropy optimization
word_cluster = Lc[ys[i]]
st_word = cwords[word_cluster, 0]
ed_word = st_word + cfreq[word_cluster]
part_cluster = zeros((self.class_size, ))
part_word = zeros((ed_word - st_word, ))
if self.isME is True:
if direct_size > 0 and xs[i] != -1:
part_cluster += self.params.cluster_direct[xs[i]]
indexs = cwords[word_cluster, 0:int(cfreq[word_cluster])]
if xs[i] < direct_size:
part_word += self.params.word_direct[xs[i], indexs]
if self.isCompression is True:
cs[i] = sigmoid(C.dot(hs[i+1]))
part_cluster += U[self.vdim:].dot(cs[i])
part_word += U[st_word:ed_word].dot(cs[i])
ps[i, self.vdim:] = softmax(part_cluster)
ps[i, st_word:ed_word] = softmax(part_word)
else:
part_cluster += U[self.vdim:].dot(hs[i+1])
part_word += U[st_word:ed_word].dot(hs[i+1])
ps[i, self.vdim:] = softmax(part_cluster)
ps[i, st_word:ed_word] = softmax(part_word)
#ps[i, self.vdim:] = softmax(U[self.vdim:,:].dot(hs[i+1]))
#ps[i, st_word:ed_word] = softmax(U[st_word:ed_word,:].dot(hs[i+1]))
#print maximum(ps[i, ys[st_word:ed_word]]), ps[i,ys[i]], maximum(ps[i, self.vdim:]), ps[i, self.vdim+word_cluster]
J -= log(ps[i, ys[i]] * ps[i, self.vdim+word_cluster])
return J
示例2: forwardProp
def forwardProp(self,node, correct=[], guess=[]):
cost = total = 0.0
# this is exactly the same setup as forwardProp in rnn.py
if node.isLeaf == True:
node.fprop = True
node.hActs1 = self.L[:,node.word]
node.hActs2 = self.ReLU(self.W2.dot(node.hActs1)+self.b2)
node.probs = softmax(self.Ws.dot(node.hActs2)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
return cost, 1
c1,t1 = self.forwardProp(node.left,correct,guess)
c2,t2 = self.forwardProp(node.right,correct,guess)
if node.left.fprop and node.right.fprop:
node.fprop = True
h = np.hstack([node.left.hActs1, node.right.hActs1])
node.hActs1 = self.ReLU(self.W1.dot(h) + self.b1)
node.hActs2 = self.ReLU(self.W2.dot(node.hActs1) + self.b2)
node.probs = softmax(self.Ws.dot(node.hActs2)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
cost += c1
cost += c2
total += t1
total += t2
return cost, total + 1
示例3: forwardProp
def forwardProp(self,node,correct, guess):
cost = total = 0.0
if node.isLeaf == True:
node.fprop = True
node.hActs1 = self.L[:, node.word]
node.probs = softmax(self.Ws.dot(node.hActs1)+self.bs)
p = node.probs*make_onehot(node.label, len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
return cost, 1
c1,t1 = self.forwardProp(node.left,correct,guess)
c2,t2 = self.forwardProp(node.right,correct,guess)
if node.left.fprop and node.right.fprop:
node.fprop = True
h = np.hstack([node.left.hActs1, node.right.hActs1])
tmp = np.zeros(len(node.left.hActs1))
for i in range(len(tmp)):
tmp[i] = h.dot(self.V[i]).dot(h)
node.hActs1 = self.ReLU(self.W.dot(h) + self.b + tmp)
node.probs = softmax(self.Ws.dot(node.hActs1)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
cost += c1
cost += c2
total += t1
total += t2
return cost, total + 1
示例4: predict_proba
def predict_proba(self, windows):
"""
Predict class probabilities.
Should return a matrix P of probabilities,
with each row corresponding to a row of X.
windows = array (n x windowsize),
each row is a window of indices
"""
# handle singleton input by making sure we have
# a list-of-lists
if not hasattr(windows[0], "__iter__"):
windows = [windows]
#### YOUR CODE HERE ####
#print 'windows.shape',windows[0]
P=[]
for window in windows:
x = hstack([self.sparams.L[idx] for idx in window]) # extract representation,(150,) matrix
#x=reshape(x,(x.shape[0]*x.shape[1]))
#print self.params.W.shape,' ',x.shape,' ',self.params.b1.shape
a =self.params.W.dot(x)+self.params.b1#(100,150)*(150,)+(100,)=>(100,)
h = tanh(a)#(100,)
p = softmax(self.params.U.dot(h) + self.params.b2)#(5,100)*(100,)+(100,)=>(5,)
P.append(p)
#### END YOUR CODE ####
return P # rows are output for each input
示例5: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
# hs[-1] = initial hidden state (zeros)
ns = len(ys)
hs = zeros((ns+1, self.hdim))
for t in range(ns):
hs[t] = sigmoid(self.params.H.dot(hs[t-1]) + self.sparams.L[xs[t]])
#ps[t] = softmax(self.params.U.dot(hs[t]))
#J -= log(ps[t][ys[t]])
h_final = hs[ns-1]
z = self.params.U.dot(h_final)
y_hat = []
for i in range(n_aspect):
current = z[sent_dim*i:sent_dim*(i+1)]
y_hat.extend(softmax(current))
J =- sum(ys.reshape(len(ys),1)*log(array(y_hat).reshape(len(y_hat),1)))
#### END YOUR CODE ####
return J
示例6: _acc_grads
def _acc_grads(self, window, label):
"""
Accumulate gradients, given a training point
(window, label) of the format
window = [x_{i-1} x_{i} x_{i+1}] # three ints
label = {0,1,2,3,4} # single int, gives class
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.U += (your gradient dJ/dU)
self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
"""
xf = []
for idx in window:
xf.extend( self.sparams.L[idx]) # extract representation
tanhX = tanh(self.params.W.dot(xf) + self.params.b1)
softmaxP = softmax(self.params.U.dot(tanhX) + self.params.b2)
y = make_onehot(label, len(softmaxP))
delta2 = softmaxP -y
self.grads.U += outer(delta2, tanhX) + self.lreg * self.params.U
self.grads.b2 += delta2
delta1 = self.params.U.T.dot(delta2)*(1. - tanhX*tanhX)
self.grads.W += outer(delta1, xf) + self.lreg * self.params.W
self.grads.b1 += delta1
示例7: f_prop
def f_prop(self, ys, h_in):
"""Given a series of xs and a series of ys, returns hidden vector at
end, and also the cost"""
N = len(ys) # total num timesteps
#L = self.params['L']
Wh = self.params['Wh']
#Wx = self.params['Wx']
U = self.params['U']
b1 = self.params['b1']
b2 = self.params['b2']
self.yhats = np.zeros([self.outdim, N])
self.hs = np.zeros([self.hdim, N+1])
# np.random.seed(2234)
# self.hs[:,-1] = np.random.normal(0,.1,(self.hdim))
self.hs[:,-1] = h_in
cost = 0
for t in xrange(N):
h_prev = self.hs[:,t-1]
z_1 = np.dot(Wh, h_prev) + b1 #+ np.dot(Wx, Lx)
h1 = np.maximum(z_1, 0)
self.hs[:,t] = h1
yhat = softmax(np.dot(U, h1) + b2)
self.yhats[:,t] = yhat
cost += -np.log(yhat[ys[t]])
return cost
示例8: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
ns = len(xs)
hs = zeros((ns+1, self.hdim))
ps = zeros((ns, self.vdim))
for i in range(ns):
z1 = self.params.H.dot(hs[i-1]) + self.sparams.L[xs[i]]
hs[i] = sigmoid(z1)
z2 = self.params.U.dot(hs[i])
ps[i] = softmax(z2)
J = sum(-log(ps[range(len(ps)), ys]))
return J
示例9: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
ns = len(xs)
self.xs = xs
self.ys=ys
hs = zeros((ns+1, self.hdim))
self.hs1 = hs
# for each time step
for t in xrange(ns):
hs[t] = sigmoid(dot(self.params.H, hs[t - 1]) + self.sparams.L[xs[t]])
y_hat = softmax(dot(self.params.U, hs[t]))
J -= log(y_hat[ys[t]])
#### END YOUR CODE ####
return J
示例10: _acc_grads
def _acc_grads(self, xs, ys):
"""
Accumulate gradients, given a pair of training sequences:
xs = [<indices>] # input words
ys = [<indices>] # output words (to predict)
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.H += (your gradient dJ/dH)
self.sgrads.L[i] = (gradient dJ/dL[i]) # update row
Per the handout, you should:
- make predictions by running forward in time
through the entire input sequence
- for *each* output word in ys, compute the
gradients with respect to the cross-entropy
loss for that output word
- run backpropagation-through-time for self.bptt
timesteps, storing grads in self.grads (for H, U)
and self.sgrads (for L)
You'll want to store your predictions \hat{y}(t)
and the hidden layer values h(t) as you run forward,
so that you can access them during backpropagation.
At time 0, you should initialize the hidden layer to
be a vector of zeros.
"""
# Expect xs as list of indices
ns = len(xs)
# make matrix here of corresponding h(t)
# hs[-1] = initial hidden state (zeros)
hs = zeros((ns+1, self.hdim))
# predicted probs
ps = zeros((ns, self.vdim))
#### YOUR CODE HERE ####
# forward propagation
for t in xrange(ns):
hs[t] = sigmoid(dot(self.params.H, hs[t-1]) + self.sparams.L[xs[t]])
ps[t] = softmax(dot(self.sparams.U, hs[t]))
# backpropagation through time
for i in xrange(ns):
d2i = ps[i]
d2i[ys[i]] -= 1
d1 = dot(self.sparams.U.T, d2i) * hs[i] * (1 - hs[i])
self.sgrads.U = dot(d2i.reshape((-1, 1)), hs[i].reshape((1, -1)))
for t in xrange(i, i - self.bptt - 1, -1):
if t >= 0: # the farthest reference will thus be hs[-1]
self.sgrads.L[xs[t]] = d1
self.grads.H += dot(d1.reshape((-1, 1)), hs[t-1].reshape((1, -1)))
d1 = dot(self.params.H.T, d1) * hs[t-1] * (1 - hs[t-1]) # accumulate punishments/deltas
示例11: predict_proba
def predict_proba(self, windows):
"""
Predict class probabilities.
Should return a matrix P of probabilities,
with each row corresponding to a row of X.
windows = array (n x windowsize),
each row is a window of indices
"""
# handle singleton input by making sure we have
# a list-of-lists
#hasattr( object, name)
#The arguments are an object and a string. The result is True if the string is the name of one of the object's
#attributes, False if not. (This is implemented by calling getattr(object, name) and seeing whether it raises an
#exception or not.)
if not hasattr(windows[0], "__iter__"):
windows = [windows]
#### YOUR CODE HERE ####
P = []
for window in windows:
x = hstack(self.sparams.L[window])
h = tanh(self.params.W.dot(x) + self.params.b1)
p = softmax(self.params.U.dot(h) + self.params.b2)
P.append(p)
#### END YOUR CODE ####
return P # rows are output for each input
示例12: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
#J = 0
ns = len(xs)
#### YOUR CODE HERE ####
# forward propagation
hs = zeros((ns+1, self.hdim))
ps = zeros((ns, self.vdim)) # predicted probas
for t in range(0, ns):
hs[t] = sigmoid(dot(self.params.H, hs[t-1]) + self.sparams.L[xs[t], :])
ps[t] = softmax(dot(self.params.U, hs[t]))
J = - sum(log(ps[arange(ns), ys]))
#### END YOUR CODE ####
return J
示例13: predict_proba
def predict_proba(self, windows):
"""
Predict class probabilities.
Should return a matrix P of probabilities,
with each row corresponding to a row of X.
windows = array (n x windowsize),
each row is a window of indices
"""
# handle singleton input by making sure we have
# a list-of-lists
if not hasattr(windows[0], "__iter__"):
windows = [windows]
#### YOUR CODE HERE ####
# x - (W) -> a - (tanh) -> h - (U) -> z - (softmax) -> p
P = []
for window in windows: # Is it possible to use fully-vectorized method instead of for loop?
x = hstack(self.sparams.L[window, :]) # the same as above
h = tanh(self.params.W.dot(x) + self.params.b1)
p = softmax(self.params.U.dot(h) + self.params.b2)
P.append(p)
#### END YOUR CODE ####
return array(P) # rows are output for each input
示例14: predict_proba
def predict_proba(self, windows):
"""
Predict class probabilities.
Should return a matrix P of probabilities,
with each row corresponding to a row of X.
windows = array (n x windowsize),
each row is a window of indices
"""
# handle singleton input by making sure we have
# a list-of-lists
if not hasattr(windows[0], "__iter__"):
windows = [windows]
P = []
for window in windows:
# extract representation: concatenate window of words into a numpy colunm vector
x = hstack(self.sparams.L[window, :])
# just two layers, so simple
h = tanh(self.params.W.dot(x) + self.params.b1)
p = softmax(self.params.U.dot(h) + self.params.b2)
P.append(p)
return array(P) # rows are output for each input
示例15: compute_loss
def compute_loss(self, windows, labels):
"""
Compute the loss for a given dataset.
windows = same as for predict_proba
labels = list of class labels, for each row of windows
"""
#### YOUR CODE HERE ####
if not hasattr(windows[0], "__iter__"):
windows = [windows]
labels = [labels]
N = len(windows)
# x = self.sparams.L[windows]
# x = x.reshape((N,x.shape[-2]*x.shape[-1]))
# z = x.dot(self.params.W.T) + self.params.b1
# h = tanh(z)
# z2 = h.dot(self.params.U.T) + self.params.b2
# p = softmax(z2)
# J -= sum(log(p[0][labels])
# J += (self.lreg / 2.0) * (sum(self.params.W**2.0) + sum(self.params.U**2.0))
J = 0
for n in xrange(N):
x = self.sparams.L[windows[n]]
x = reshape(x, x.shape[0]*x.shape[1])
h = tanh(self.params.W.dot(x) + self.params.b1)
y_hat = softmax(self.params.U.dot(h) + self.params.b2)
y = make_onehot(labels[n], len(y_hat))
J -= sum(y*log(y_hat))
J += (self.lreg / 2.0) * (sum(self.params.W**2.0) + sum(self.params.U**2.0))
#### END YOUR CODE ####
return J