本文整理汇总了Python中nn.math.sigmoid函数的典型用法代码示例。如果您正苦于以下问题:Python sigmoid函数的具体用法?Python sigmoid怎么用?Python sigmoid使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sigmoid函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
ns = len(xs)
hs = np.zeros((ns+1,self.hdim))
for i in range(ns):
hs[i+1] = sigmoid(self.params.H.dot(hs[i])+self.params.W.dot(self.sparams.L[xs[i]]))
nodeCur = self.word2node[ys[i]]
while nodeCur.parent != None:
t = 1
if nodeCur.isLeft == False:
t = -1
nodeCur = nodeCur.parent
J += -np.log(sigmoid(t*nodeCur.hActs.dot(hs[i+1])))
#### END YOUR CODE ####
x = self.hierarchicalU.getSumSquareU(self.hierarchicalU.root)
Jreg = 0.5*self.lreg*(np.sum(self.params.H**2)+np.sum(self.params.W**2) + x)
return J + Jreg
示例2: compute_seq_ppl
def compute_seq_ppl(self, xs, ys):
#### YOUR CODE HERE ####
J = 0
ns = len(xs)
hs = zeros((ns+1, self.hdim))
cs = zeros((ns, self.cdim))
# predicted probas
ps = zeros((ns, self.Udim))
#### YOUR CODE HERE ####
L = self.sparams.L
Lc = self.Lcluster
cfreq = self.cfreq
cwords = self.cwords
direct_size = self.hsize
U = self.params.U
H = self.params.H
C = zeros((self.cdim, self.hdim))
if self.isCompression is True:
C = self.params.C
##
# Forward propagation
for i in xrange(ns):
hs[i+1] = sigmoid(H.dot(hs[i]) + L[xs[i]])
#hs[i+1] = 2.0/(1 + exp(-2.0*(H.dot(hs[i]) + L[xs[i]]))) - 1
#without maximum entropy optimization
word_cluster = Lc[ys[i]]
st_word = cwords[word_cluster, 0]
ed_word = st_word + cfreq[word_cluster]
part_cluster = zeros((self.class_size, ))
part_word = zeros((ed_word - st_word, ))
if self.isME is True:
if direct_size > 0 and xs[i] != -1:
part_cluster += self.params.cluster_direct[xs[i]]
indexs = cwords[word_cluster, 0:int(cfreq[word_cluster])]
if xs[i] < direct_size:
part_word += self.params.word_direct[xs[i], indexs]
if self.isCompression is True:
cs[i] = sigmoid(C.dot(hs[i+1]))
part_cluster += U[self.vdim:].dot(cs[i])
part_word += U[st_word:ed_word].dot(cs[i])
ps[i, self.vdim:] = softmax(part_cluster)
ps[i, st_word:ed_word] = softmax(part_word)
else:
part_cluster += U[self.vdim:].dot(hs[i+1])
part_word += U[st_word:ed_word].dot(hs[i+1])
ps[i, self.vdim:] = softmax(part_cluster)
ps[i, st_word:ed_word] = softmax(part_word)
#ps[i, self.vdim:] = softmax(U[self.vdim:,:].dot(hs[i+1]))
#ps[i, st_word:ed_word] = softmax(U[st_word:ed_word,:].dot(hs[i+1]))
#print maximum(ps[i, ys[st_word:ed_word]]), ps[i,ys[i]], maximum(ps[i, self.vdim:]), ps[i, self.vdim+word_cluster]
J -= log(ps[i, ys[i]] * ps[i, self.vdim+word_cluster])
return J
示例3: _acc_grads
def _acc_grads(self, xs, ys):
#### YOUR CODE HERE ####
# Expect xs as list of indices
ns = len(xs)
# make matrix here of corresponding h(t)
# hs[-1] = initial hidden state (zeros)
hs = np.zeros((ns+1, self.hdim))
# predicted probas
ps = np.zeros((ns+1, self.vdim))
#### YOUR CODE HERE ####
##
# Forward propagation
zs = np.zeros((ns+1,self.hdim))
for i in range(ns):
zs[i+1] = self.params.H.dot(hs[i]) + self.params.W.dot(self.sparams.L[xs[i]])
hs[i+1] = sigmoid(zs[i+1])
##
# Backward propagation through time
sgradsTmp = np.zeros((self.vdim,self.hdim))
grad0 = np.zeros((ns+1,self.hdim)) # (y-t)*U
for i in range(ns):
nodeCur = self.word2node[ys[i]]
while nodeCur.parent != None:
t = 1
if nodeCur.isLeft == False:
t = 0
nodeCur = nodeCur.parent
if nodeCur.grad == None:
nodeCur.grad = (sigmoid(nodeCur.hActs.dot(hs[i+1]))-t)*hs[i+1]
else:
nodeCur.grad = nodeCur.grad + (sigmoid(nodeCur.hActs.dot(hs[i+1]))-t)*hs[i+1]
grad0[i+1] = grad0[i+1] + (sigmoid(nodeCur.hActs.dot(hs[i+1]))-t)*nodeCur.hActs
vectorCurrent = grad0[i+1]*sigmoidGrad(zs[i+1])
for j in range(min(i+1,self.bptt+1)):
xh1 = np.ones((self.hdim, self.hdim)).dot(np.diag(hs[i-j]))
self.grads.H += np.diag(vectorCurrent).dot(xh1)
x1 = np.ones((self.hdim, self.hdim)).dot(np.diag(self.sparams.L[xs[i-j]]))
self.grads.W += np.diag(vectorCurrent).dot(x1)
sgradsTmp[xs[i-j]] += vectorCurrent.dot(self.params.W)
vectorCurrent = vectorCurrent.dot(self.params.H)
vectorCurrent = vectorCurrent*sigmoidGrad(zs[i-j])
self.hierarchicalU.regularizedGrad(self.hierarchicalU.root,self.lreg)
self.grads.H += self.lreg*self.params.H
self.grads.W += self.lreg*self.params.W
for i in range(len(sgradsTmp)):
self.sgrads.L[i] = sgradsTmp[i,:]
示例4: _acc_grads
def _acc_grads(self, xs, ys, d):
# Expect xs as list of indices
ns = len(xs)
# make matrix here of corresponding h(t)
# hs[-1] = initial hidden state (zeros)
hs = zeros((ns+1, self.hdim))
# predicted probas
ps = zeros((ns, self.vdim))
zs = zeros((ns+1, self.hdim))
##
# Forward propagation
d_vec = self.sparams.D[d]
for t in xrange(ns):
x_t = xs[t]
zs[t] = self.params.H.dot(hs[t-1]) + self.sparams.L[x_t] + d_vec
hs[t] = sigmoid(zs[t])
ps[t] = softmax(self.params.U.dot(hs[t]) + self.params.G.dot(d_vec.T).reshape(self.vdim,))
##
# Backward propagation through time
d_grad = zeros_like(self.sparams.D[0])
for t in reversed(xrange(ns)):
delta = zeros((ns, self.hdim))
p_t = ps[t]
eps_t = p_t - make_onehot(ys[t], len(p_t))
self.grads.U += outer(eps_t, hs[t])
self.grads.G += outer(eps_t, d_vec)
d_grad += self.params.G.T.dot(eps_t)
sig_prime_t = sigmoid(zs[t])*(1.-sigmoid(zs[t]))
delta[t] = sig_prime_t * self.params.U.T.dot(eps_t)
self.sgrads.L[xs[t]] = delta[t].copy()
d_grad += delta[t].copy()
self.grads.H += outer(delta[t], hs[t-1])
for i in xrange(1, self.bptt):
j = t-i
if j < 0: continue
sig_prime_j = sigmoid(zs[j])*(1.-sigmoid(zs[j]))
delta[j] = sig_prime_j * self.params.H.T.dot(delta[j+1])
self.sgrads.L[xs[j]] = delta[j].copy()
d_grad += delta[j].copy()
self.grads.H += outer(delta[j], hs[j-1])
self.sgrads.D[d] = d_grad.copy()
示例5: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
ns = len(xs)
hs = zeros((ns+1, self.hdim))
# predicted probas
ps = zeros((ns, self.vdim))
#### YOUR CODE HERE ####
L = self.sparams.L
U = self.params.U
H = self.params.H
##
# Forward propagation
for i in xrange(ns):
hs[i+1] = sigmoid(H.dot(hs[i]) + L[xs[i]])
#hs[i+1] = 2.0/(1.0 + exp(-2.0*(H.dot(hs[i]) + L[xs[i]]))) - 1.0
ps[i] = softmax(U.dot(hs[i+1]))
J -= log(ps[i][ys[i]])
#### END YOUR CODE ####
return J
示例6: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
ns = len(xs)
hs = zeros((ns+1, self.hdim))
ps = zeros((ns, self.vdim))
for i in xrange(ns):
hs[i] = sigmoid(self.params.H.dot(hs[i-1]) + self.sparams.L[xs[i]])
ps[i] = softmax(self.params.U.dot(hs[i]))
J -= log(ps[i][ys[i]])
#### END YOUR CODE ####
return J
示例7: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
# Expect xs as list of indices
ns = len(xs)
# make matrix here of corresponding h(t)
# hs[-1] = initial hidden state (zeros)
hs = zeros((ns+1, self.hdim))
# _for memory purposes_, we do not compute the loss in one fell swoop
# forward propagation
for t in xrange(ns):
hs[t] = sigmoid(dot(self.params.H, hs[t-1]) + self.sparams.L[xs[t]])
p = softmax(dot(self.sparams.U, hs[t]))
J -= sum(log(p[ys[t]]))
#### END YOUR CODE ####
return J
示例8: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
ns = len(xs)
self.xs = xs
self.ys=ys
hs = zeros((ns+1, self.hdim))
self.hs1 = hs
# for each time step
for t in xrange(ns):
hs[t] = sigmoid(dot(self.params.H, hs[t - 1]) + self.sparams.L[xs[t]])
y_hat = softmax(dot(self.params.U, hs[t]))
J -= log(y_hat[ys[t]])
#### END YOUR CODE ####
return J
示例9: _acc_grads
def _acc_grads(self, xs, ys):
"""
Accumulate gradients, given a pair of training sequences:
xs = [<indices>] # input words
ys = [<indices>] # output words (to predict)
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.H += (your gradient dJ/dH)
self.sgrads.L[i] = (gradient dJ/dL[i]) # update row
Per the handout, you should:
- make predictions by running forward in time
through the entire input sequence
- for *each* output word in ys, compute the
gradients with respect to the cross-entropy
loss for that output word
- run backpropagation-through-time for self.bptt
timesteps, storing grads in self.grads (for H, U)
and self.sgrads (for L)
You'll want to store your predictions \hat{y}(t)
and the hidden layer values h(t) as you run forward,
so that you can access them during backpropagation.
At time 0, you should initialize the hidden layer to
be a vector of zeros.
"""
# Expect xs as list of indices
ns = len(xs)
# make matrix here of corresponding h(t)
# hs[-1] = initial hidden state (zeros)
hs = zeros((ns+1, self.hdim))
# predicted probs
ps = zeros((ns, self.vdim))
#### YOUR CODE HERE ####
# forward propagation
for t in xrange(ns):
hs[t] = sigmoid(dot(self.params.H, hs[t-1]) + self.sparams.L[xs[t]])
ps[t] = softmax(dot(self.sparams.U, hs[t]))
# backpropagation through time
for i in xrange(ns):
d2i = ps[i]
d2i[ys[i]] -= 1
d1 = dot(self.sparams.U.T, d2i) * hs[i] * (1 - hs[i])
self.sgrads.U = dot(d2i.reshape((-1, 1)), hs[i].reshape((1, -1)))
for t in xrange(i, i - self.bptt - 1, -1):
if t >= 0: # the farthest reference will thus be hs[-1]
self.sgrads.L[xs[t]] = d1
self.grads.H += dot(d1.reshape((-1, 1)), hs[t-1].reshape((1, -1)))
d1 = dot(self.params.H.T, d1) * hs[t-1] * (1 - hs[t-1]) # accumulate punishments/deltas
示例10: predict_proba
def predict_proba(self, windows):
"""
Predict class probabilities.
Should return a matrix P of probabilities,
with each row corresponding to a row of X.
windows = array (n x windowsize),
each row is a window of indices
"""
# handle singleton input by making sure we have
# a list-of-lists
if not hasattr(windows[0], "__iter__"):
windows = [windows]
#### YOUR CODE HERE ####
# construct input matrix
x = vstack([concatenate([self.sparams.L[idx] for idx in window]) for window in windows])
z1 = self.params.W.dot(x.T) + self.params.b1[:, newaxis]
h1 = 2 * sigmoid(2 * z1) - 1
z2 = self.params.U.dot(h1) + self.params.b2[:, newaxis]
P = softmax(z2.T)
#### END YOUR CODE ####
return P # rows are output for each input
示例11: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
#J = 0
ns = len(xs)
#### YOUR CODE HERE ####
# forward propagation
hs = zeros((ns+1, self.hdim))
ps = zeros((ns, self.vdim)) # predicted probas
for t in range(0, ns):
hs[t] = sigmoid(dot(self.params.H, hs[t-1]) + self.sparams.L[xs[t], :])
ps[t] = softmax(dot(self.params.U, hs[t]))
J = - sum(log(ps[arange(ns), ys]))
#### END YOUR CODE ####
return J
示例12: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
ns = len(xs)
hs = zeros((ns+1, self.hdim))
ps = zeros((ns, self.vdim))
for i in range(ns):
z1 = self.params.H.dot(hs[i-1]) + self.sparams.L[xs[i]]
hs[i] = sigmoid(z1)
z2 = self.params.U.dot(hs[i])
ps[i] = softmax(z2)
J = sum(-log(ps[range(len(ps)), ys]))
return J
示例13: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
ns = len(xs)
h_ant = zeros((1, self.hdim))
J = 0
#### YOUR CODE HERE ####
for step in xrange(0,ns):
# print "hs[step-1].shape %s" % (hs[step-1].shape,)
# print "self.params.H.shape %s" % (self.params.H.shape,)
# print "self.sparams.L.shape %s" % (self.sparams.L.shape,)
# print "self.sparams.L[xs[step]].shape %s" % (self.sparams.L[xs[step]].shape,)
a1 = self.params.H.dot(h_ant.T).T + self.sparams.L[xs[step]]
h = sigmoid( a1 )
a2 = self.params.U.dot(h.T).T
# print "h.shape %s" % (h.shape,)
# print "a2.shape %s" % (a2.shape,)
# print "self.params.U.shape %s" % (self.params.U.shape,)
y_hat = softmax( a2 )
h_ant = h
J -= log( y_hat[:,ys[step]] )
#### END YOUR CODE ####
return J
示例14: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
ns = len(xs)
h_prev = zeros(self.hdim)
for t in xrange(ns):
h_t = sigmoid(dot(self.params.H, h_prev) + self.sparams.L[xs[t]])
if t == ns - 1:
yhat_t = softmax(dot(self.params.U, h_t))
J = -log(yhat_t[ys])
h_prev = h_t
J += .5 * self.lamb * (sum(self.params.H**2) + sum(self.params.U**2))
#### END YOUR CODE ####
return J
示例15: compute_seq_loss
def compute_seq_loss(self, xs, ys):
"""
Compute the total cross-entropy loss
for an input sequence xs and output
sequence (labels) ys.
You should run the RNN forward,
compute cross-entropy loss at each timestep,
and return the sum of the point losses.
"""
J = 0
#### YOUR CODE HERE ####
ns = len(xs)
hs = zeros((ns+1, self.hdim))
ps = zeros((ns, self.vdim))#(3,10)
# Forward propagation
for t in xrange(ns):
hs[t] = sigmoid(self.params.H.dot(hs[t - 1]) + self.sparams.L[xs[t]])#(Dh,Dh)*(Dh,)+(Dh,)
ps[t] = softmax(self.params.U.dot(hs[t]))#(V,Dh)*(Dh,)
J += - log(ps[t][ys[t]])
#print ps[t]
#print [ys[t]]
#J += -ys[t]*log(ps[t])
#### END YOUR CODE ####
return J