Python math.softmax函数代码示例

本文整理汇总了Python中nn.math.softmax函数的典型用法代码示例。如果您正苦于以下问题：Python softmax函数的具体用法？Python softmax怎么用？Python softmax使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了softmax函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: compute_seq_ppl

    def compute_seq_ppl(self, xs, ys):
        #### YOUR CODE HERE ####
        J = 0
        ns = len(xs)
        hs = zeros((ns+1, self.hdim))
        cs = zeros((ns, self.cdim))
        # predicted probas
        ps = zeros((ns, self.Udim))

        #### YOUR CODE HERE ####
        L = self.sparams.L
        Lc = self.Lcluster
        cfreq = self.cfreq
        cwords = self.cwords
        direct_size = self.hsize
        U = self.params.U
        H = self.params.H
        C = zeros((self.cdim, self.hdim))
        if self.isCompression is True:
            C = self.params.C
        ##
        # Forward propagation
        for i in xrange(ns):
            hs[i+1] = sigmoid(H.dot(hs[i]) + L[xs[i]])
            #hs[i+1] = 2.0/(1 + exp(-2.0*(H.dot(hs[i]) + L[xs[i]]))) - 1
            #without maximum entropy optimization
            word_cluster = Lc[ys[i]]
            st_word = cwords[word_cluster, 0]
            ed_word = st_word + cfreq[word_cluster]
            
            part_cluster = zeros((self.class_size, ))
            part_word = zeros((ed_word - st_word, ))
            if self.isME is True:
                if direct_size > 0 and xs[i] != -1:
                    part_cluster += self.params.cluster_direct[xs[i]]
                    indexs = cwords[word_cluster, 0:int(cfreq[word_cluster])]
                    
                    if xs[i] < direct_size:
                        part_word += self.params.word_direct[xs[i], indexs]
            
            if self.isCompression is True:
                cs[i] = sigmoid(C.dot(hs[i+1]))
                part_cluster += U[self.vdim:].dot(cs[i])
                part_word += U[st_word:ed_word].dot(cs[i])
                ps[i, self.vdim:] = softmax(part_cluster)
                ps[i, st_word:ed_word] = softmax(part_word)
                
            else:
                part_cluster += U[self.vdim:].dot(hs[i+1])
                part_word += U[st_word:ed_word].dot(hs[i+1])
                
                ps[i, self.vdim:] = softmax(part_cluster)
                ps[i, st_word:ed_word] = softmax(part_word)
                #ps[i, self.vdim:] = softmax(U[self.vdim:,:].dot(hs[i+1]))
                #ps[i, st_word:ed_word] = softmax(U[st_word:ed_word,:].dot(hs[i+1]))
            
            #print maximum(ps[i, ys[st_word:ed_word]]), ps[i,ys[i]], maximum(ps[i, self.vdim:]), ps[i, self.vdim+word_cluster]
            J -= log(ps[i, ys[i]] * ps[i, self.vdim+word_cluster])
        
        return J

开发者ID:janenie，项目名称:rnn_research，代码行数:60，代码来源:rnnlm.py

示例2: forwardProp

 def forwardProp(self,node, correct=[], guess=[]):
     cost  =  total = 0.0
     # this is exactly the same setup as forwardProp in rnn.py
     if node.isLeaf == True:
         node.fprop = True
         node.hActs1 = self.L[:,node.word]
         node.hActs2 = self.ReLU(self.W2.dot(node.hActs1)+self.b2)
         node.probs = softmax(self.Ws.dot(node.hActs2)+self.bs)
         p = node.probs*make_onehot(node.label,len(self.bs))
         cost = -np.log(np.sum(p))
         correct.append(node.label)
         guess.append(np.argmax(node.probs))
         return cost, 1
     
     c1,t1 = self.forwardProp(node.left,correct,guess)
     c2,t2 = self.forwardProp(node.right,correct,guess)
     if node.left.fprop and node.right.fprop:
         node.fprop = True
         h = np.hstack([node.left.hActs1, node.right.hActs1])
         node.hActs1 = self.ReLU(self.W1.dot(h) + self.b1)
         node.hActs2 = self.ReLU(self.W2.dot(node.hActs1) + self.b2)
         node.probs = softmax(self.Ws.dot(node.hActs2)+self.bs)
         p = node.probs*make_onehot(node.label,len(self.bs))
         cost = -np.log(np.sum(p))
         correct.append(node.label)
         guess.append(np.argmax(node.probs))
         
     cost += c1
     cost += c2
     total += t1
     total += t2
     return cost, total + 1

开发者ID:alphadl，项目名称:cs224d，代码行数:32，代码来源:rnn2deep.py

示例3: forwardProp

 def forwardProp(self,node,correct, guess):
     cost = total = 0.0
     if node.isLeaf == True:
         node.fprop = True
         node.hActs1 = self.L[:, node.word]
         node.probs = softmax(self.Ws.dot(node.hActs1)+self.bs)
         p = node.probs*make_onehot(node.label, len(self.bs))
         cost = -np.log(np.sum(p))
         correct.append(node.label)
         guess.append(np.argmax(node.probs))
         return cost, 1
         
     c1,t1 = self.forwardProp(node.left,correct,guess)
     c2,t2 = self.forwardProp(node.right,correct,guess)
     if node.left.fprop and node.right.fprop:
         node.fprop = True
         h = np.hstack([node.left.hActs1, node.right.hActs1])
         tmp = np.zeros(len(node.left.hActs1))
         for i in range(len(tmp)):
             tmp[i] = h.dot(self.V[i]).dot(h)
         node.hActs1 = self.ReLU(self.W.dot(h) + self.b + tmp)
         node.probs = softmax(self.Ws.dot(node.hActs1)+self.bs)
         p = node.probs*make_onehot(node.label,len(self.bs))
         cost = -np.log(np.sum(p))
         correct.append(node.label)
         guess.append(np.argmax(node.probs))
         
     cost += c1
     cost += c2
     total += t1
     total += t2
     return cost, total + 1

开发者ID:alphadl，项目名称:cs224d，代码行数:32，代码来源:rntn.py

示例4: predict_proba

    def predict_proba(self, windows):
        """
        Predict class probabilities.

        Should return a matrix P of probabilities,
        with each row corresponding to a row of X.

        windows = array (n x windowsize),
            each row is a window of indices
        """
        # handle singleton input by making sure we have
        # a list-of-lists
        if not hasattr(windows[0], "__iter__"):
            windows = [windows]

        #### YOUR CODE HERE ####
        #print 'windows.shape',windows[0]
        P=[]
        for window in windows:
            x = hstack([self.sparams.L[idx] for idx in window]) # extract representation,(150,) matrix
            #x=reshape(x,(x.shape[0]*x.shape[1]))
            #print self.params.W.shape,' ',x.shape,' ',self.params.b1.shape
            a =self.params.W.dot(x)+self.params.b1#(100,150)*(150,)+(100,)=>(100,)
            h = tanh(a)#(100,)
            p = softmax(self.params.U.dot(h) + self.params.b2)#(5,100)*(100,)+(100,)=>(5,)
            P.append(p)
        #### END YOUR CODE ####


        return P # rows are output for each input

开发者ID:Tang7，项目名称:rnn224，代码行数:30，代码来源:nerwindow.py

示例5: compute_seq_loss

    def compute_seq_loss(self, xs, ys):
        """
        Compute the total cross-entropy loss
        for an input sequence xs and output
        sequence (labels) ys.

        You should run the RNN forward,
        compute cross-entropy loss at each timestep,
        and return the sum of the point losses.
        """

        J = 0
        #### YOUR CODE HERE ####
        # hs[-1] = initial hidden state (zeros)
        ns = len(ys)
        hs = zeros((ns+1, self.hdim))

        for t in range(ns):
            hs[t] = sigmoid(self.params.H.dot(hs[t-1]) + self.sparams.L[xs[t]])
            #ps[t] = softmax(self.params.U.dot(hs[t]))
            #J -= log(ps[t][ys[t]])
        h_final = hs[ns-1]
        z = self.params.U.dot(h_final) 
        y_hat = []
        for i in range(n_aspect):
            current = z[sent_dim*i:sent_dim*(i+1)]
            y_hat.extend(softmax(current))
        J =- sum(ys.reshape(len(ys),1)*log(array(y_hat).reshape(len(y_hat),1)))

        #### END YOUR CODE ####
        return J

开发者ID:sktnikolaus，项目名称:EntitySentiment，代码行数:31，代码来源:rnn_simple.py

示例6: _acc_grads

    def _acc_grads(self, window, label):
        """
        Accumulate gradients, given a training point
        (window, label) of the format

        window = [x_{i-1} x_{i} x_{i+1}] # three ints
        label = {0,1,2,3,4} # single int, gives class

        Your code should update self.grads and self.sgrads,
        in order for gradient_check and training to work.

        So, for example:
        self.grads.U += (your gradient dJ/dU)
        self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
        """
        xf = []
        for idx in window:
            xf.extend( self.sparams.L[idx]) # extract representation
        tanhX = tanh(self.params.W.dot(xf) + self.params.b1)
        softmaxP = softmax(self.params.U.dot(tanhX) + self.params.b2)
        y = make_onehot(label, len(softmaxP))
        delta2 = softmaxP -y
        self.grads.U += outer(delta2, tanhX) + self.lreg * self.params.U
        self.grads.b2 += delta2
        delta1 = self.params.U.T.dot(delta2)*(1. - tanhX*tanhX)
        self.grads.W += outer(delta1, xf) + self.lreg * self.params.W
        self.grads.b1 += delta1

开发者ID:gargvinit，项目名称:cs224d，代码行数:27，代码来源:nerwindow.py

示例7: f_prop

    def f_prop(self, ys, h_in):
        """Given a series of xs and a series of ys, returns hidden vector at
        end, and also the cost"""
        N = len(ys) # total num timesteps
        #L = self.params['L']
        Wh = self.params['Wh']
        #Wx = self.params['Wx']
        U = self.params['U']
        b1 = self.params['b1']
        b2 = self.params['b2']
        
        self.yhats = np.zeros([self.outdim, N])
        self.hs = np.zeros([self.hdim, N+1])
        # np.random.seed(2234)
        # self.hs[:,-1] = np.random.normal(0,.1,(self.hdim))
        self.hs[:,-1] = h_in

        cost = 0
        
        for t in xrange(N):
            h_prev = self.hs[:,t-1]
            z_1 = np.dot(Wh, h_prev) + b1 #+ np.dot(Wx, Lx)
            h1 = np.maximum(z_1, 0)
            self.hs[:,t] = h1
            yhat = softmax(np.dot(U, h1) + b2)
            self.yhats[:,t] = yhat
            cost += -np.log(yhat[ys[t]])

        return cost

开发者ID:arthur-tsang，项目名称:EqnMaster，代码行数:29，代码来源:dec.py

示例8: compute_seq_loss

    def compute_seq_loss(self, xs, ys):
        """
        Compute the total cross-entropy loss
        for an input sequence xs and output
        sequence (labels) ys.

        You should run the RNN forward,
        compute cross-entropy loss at each timestep,
        and return the sum of the point losses.
        """


       
        ns = len(xs)
        hs = zeros((ns+1, self.hdim))
        ps = zeros((ns, self.vdim))
        
        for i in range(ns):
            z1 = self.params.H.dot(hs[i-1]) + self.sparams.L[xs[i]]
            hs[i] = sigmoid(z1)
            z2 = self.params.U.dot(hs[i])
            ps[i] = softmax(z2)        

        J = sum(-log(ps[range(len(ps)), ys]))

        return J

开发者ID:WenyingLiu，项目名称:cs224d，代码行数:26，代码来源:rnnlm.py

示例9: compute_seq_loss

    def compute_seq_loss(self, xs, ys):
        """
        Compute the total cross-entropy loss
        for an input sequence xs and output
        sequence (labels) ys.
        You should run the RNN forward,
        compute cross-entropy loss at each timestep,
        and return the sum of the point losses.
        """

        J = 0
        #### YOUR CODE HERE ####
        ns = len(xs)
        self.xs = xs
        self.ys=ys
        
        hs = zeros((ns+1, self.hdim))
        self.hs1 = hs
        # for each time step
        for t in xrange(ns):
            hs[t] = sigmoid(dot(self.params.H, hs[t - 1]) + self.sparams.L[xs[t]])
            y_hat = softmax(dot(self.params.U, hs[t]))
            J -= log(y_hat[ys[t]])

        #### END YOUR CODE ####
        return J

开发者ID:ryu577，项目名称:base，代码行数:26，代码来源:msushkov_rnnlm.py

示例10: _acc_grads

    def _acc_grads(self, xs, ys):
        """
        Accumulate gradients, given a pair of training sequences:
        xs = [<indices>] # input words
        ys = [<indices>] # output words (to predict)

        Your code should update self.grads and self.sgrads,
        in order for gradient_check and training to work.

        So, for example:
        self.grads.H += (your gradient dJ/dH)
        self.sgrads.L[i] = (gradient dJ/dL[i]) # update row

        Per the handout, you should:
            - make predictions by running forward in time
                through the entire input sequence
            - for *each* output word in ys, compute the
                gradients with respect to the cross-entropy
                loss for that output word
            - run backpropagation-through-time for self.bptt
                timesteps, storing grads in self.grads (for H, U)
                and self.sgrads (for L)

        You'll want to store your predictions \hat{y}(t)
        and the hidden layer values h(t) as you run forward,
        so that you can access them during backpropagation.

        At time 0, you should initialize the hidden layer to
        be a vector of zeros.
        """

        # Expect xs as list of indices
        ns = len(xs)

        # make matrix here of corresponding h(t)
        # hs[-1] = initial hidden state (zeros)
        hs = zeros((ns+1, self.hdim))
        # predicted probs
        ps = zeros((ns, self.vdim))

        #### YOUR CODE HERE ####

        # forward propagation
        for t in xrange(ns):
            hs[t] = sigmoid(dot(self.params.H, hs[t-1]) + self.sparams.L[xs[t]])
            ps[t] = softmax(dot(self.sparams.U, hs[t]))

        # backpropagation through time
        for i in xrange(ns):
            d2i = ps[i]
            d2i[ys[i]] -= 1
            d1 = dot(self.sparams.U.T, d2i) * hs[i] * (1 - hs[i])

            self.sgrads.U = dot(d2i.reshape((-1, 1)), hs[i].reshape((1, -1)))

            for t in xrange(i, i - self.bptt - 1, -1):
                if t >= 0:                          # the farthest reference will thus be hs[-1]
                    self.sgrads.L[xs[t]] = d1
                    self.grads.H += dot(d1.reshape((-1, 1)), hs[t-1].reshape((1, -1)))
                    d1 = dot(self.params.H.T, d1) * hs[t-1] * (1 - hs[t-1])     # accumulate punishments/deltas

开发者ID:hendrycks，项目名称:ML-Coursework，代码行数:60，代码来源:rnnlm.py

示例11: predict_proba

    def predict_proba(self, windows):
        """
        Predict class probabilities.

        Should return a matrix P of probabilities,
        with each row corresponding to a row of X.

        windows = array (n x windowsize),
            each row is a window of indices
        """
        # handle singleton input by making sure we have
        # a list-of-lists
        
        #hasattr(	object, name)
        #The arguments are an object and a string. The result is True if the string is the name of one of the object's
        #attributes, False if not. (This is implemented by calling getattr(object, name) and seeing whether it raises an
        #exception or not.)
        if not hasattr(windows[0], "__iter__"):
            windows = [windows]

        #### YOUR CODE HERE ####
        P = []
        for window in windows:
            x = hstack(self.sparams.L[window])
            h = tanh(self.params.W.dot(x) + self.params.b1)
            p = softmax(self.params.U.dot(h) + self.params.b2)
            P.append(p)

        #### END YOUR CODE ####

        return P # rows are output for each input

开发者ID:NeighborhoodWang，项目名称:CS224D-problem-set2，代码行数:31，代码来源:nerwindow.py

示例12: compute_seq_loss

    def compute_seq_loss(self, xs, ys):
        """
        Compute the total cross-entropy loss
        for an input sequence xs and output
        sequence (labels) ys.

        You should run the RNN forward,
        compute cross-entropy loss at each timestep,
        and return the sum of the point losses.
        """

        #J = 0
        ns = len(xs)
        #### YOUR CODE HERE ####
        # forward propagation
        hs = zeros((ns+1, self.hdim))
        ps = zeros((ns, self.vdim)) # predicted probas
        for t in range(0, ns):
            hs[t] = sigmoid(dot(self.params.H, hs[t-1]) + self.sparams.L[xs[t], :])
            ps[t] = softmax(dot(self.params.U, hs[t]))

        J = - sum(log(ps[arange(ns), ys]))

        #### END YOUR CODE ####
        return J

开发者ID:ahmed-touati，项目名称:DeepNLP，代码行数:25，代码来源:rnnlm.py

示例13: predict_proba

    def predict_proba(self, windows):
        """
        Predict class probabilities.

        Should return a matrix P of probabilities,
        with each row corresponding to a row of X.

        windows = array (n x windowsize),
            each row is a window of indices
        """
        # handle singleton input by making sure we have
        # a list-of-lists
        if not hasattr(windows[0], "__iter__"):
            windows = [windows]

        #### YOUR CODE HERE ####
        # x - (W) -> a - (tanh) -> h - (U) -> z - (softmax) -> p
        P = []
        for window in windows: # Is it possible to use fully-vectorized method instead of for loop?
            x = hstack(self.sparams.L[window, :]) # the same as above 
            h = tanh(self.params.W.dot(x) + self.params.b1)
            p = softmax(self.params.U.dot(h) + self.params.b2)
            P.append(p)

        #### END YOUR CODE ####

        return array(P) # rows are output for each input

开发者ID:tonyzhang1231，项目名称:cs224d-assignment，代码行数:27，代码来源:nerwindow.py

示例14: predict_proba

    def predict_proba(self, windows):
        """
        Predict class probabilities.

        Should return a matrix P of probabilities,
        with each row corresponding to a row of X.

        windows = array (n x windowsize),
            each row is a window of indices
        """
        # handle singleton input by making sure we have
        # a list-of-lists
        if not hasattr(windows[0], "__iter__"):
            windows = [windows]

        P = []
        for window in windows:
            # extract representation: concatenate window of words into a numpy colunm vector
            x = hstack(self.sparams.L[window, :])
            # just two layers, so simple
            h = tanh(self.params.W.dot(x) + self.params.b1)
            p = softmax(self.params.U.dot(h) + self.params.b2)
            P.append(p)

        return array(P) # rows are output for each input

开发者ID:qcs4tracy，项目名称:NER-Project，代码行数:25，代码来源:nerwindow.py

示例15: compute_loss

    def compute_loss(self, windows, labels):
        """
        Compute the loss for a given dataset.
        windows = same as for predict_proba
        labels = list of class labels, for each row of windows
        """

        #### YOUR CODE HERE ####
        if not hasattr(windows[0], "__iter__"):
            windows = [windows]
            labels = [labels]

        N = len(windows)

        # x = self.sparams.L[windows]
        # x = x.reshape((N,x.shape[-2]*x.shape[-1]))
        # z = x.dot(self.params.W.T) + self.params.b1
        # h = tanh(z)
        # z2 = h.dot(self.params.U.T) + self.params.b2
        # p = softmax(z2)
        # J -= sum(log(p[0][labels])
        # J += (self.lreg / 2.0) * (sum(self.params.W**2.0) + sum(self.params.U**2.0))

        J = 0
        for n in xrange(N):
            x = self.sparams.L[windows[n]]
            x = reshape(x, x.shape[0]*x.shape[1])
            h = tanh(self.params.W.dot(x) + self.params.b1)
            y_hat = softmax(self.params.U.dot(h) + self.params.b2)
            y = make_onehot(labels[n], len(y_hat))
            J -= sum(y*log(y_hat))
        J += (self.lreg / 2.0) * (sum(self.params.W**2.0) + sum(self.params.U**2.0))
        #### END YOUR CODE ####
        return J

开发者ID:mlong14，项目名称:CS224D-Project，代码行数:34，代码来源:nerwindow.py

注：本文中的nn.math.softmax函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。