本文整理汇总了Python中q1_softmax.softmax函数的典型用法代码示例。如果您正苦于以下问题:Python softmax函数的具体用法?Python softmax怎么用?Python softmax使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了softmax函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_softmax_linearity_rowwise
def test_softmax_linearity_rowwise(dim_1, dim_2):
shift = np.random.uniform(low=-100,high=100,size=(dim_1,1))
#print(shift)
a1 = np.random.normal(size=(dim_1,dim_2))
a2 = a1 + shift
assert rel_error(np.max(a2 - a1), np.max(shift)) < 1e-8
assert rel_error(softmax(a1),softmax(a2)) < 1e-8
示例2: test_softmax_permutation_axis1
def test_softmax_permutation_axis1(dim_1):
a1 = np.random.normal(size=(1,dim_1))
s1 = softmax(a1)
permutation = np.random.permutation(dim_1)
inverse_permutation = np.argsort(permutation)
s1_perm = softmax(a1.ravel()[permutation])
assert rel_error(s1_perm.ravel()[inverse_permutation], s1) <= 1e-8
示例3: add_model
def add_model(self, input_data):
"""Adds a linear-layer plus a softmax transformation
The core transformation for this model which transforms a batch of input
data into a batch of predictions. In this case, the mathematical
transformation effected is
y = softmax(xW + b)
Hint: Make sure to create tf.Variables as needed. Also, make sure to use
tf.name_scope to ensure that your name spaces are clean.
Hint: For this simple use-case, it's sufficient to initialize both weights W
and biases b with zeros.
Args:
input_data: A tensor of shape (batch_size, n_features).
Returns:
out: A tensor of shape (batch_size, n_classes)
"""
### YOUR CODE HERE
#raise NotImplementedError
self.W = tf.Variable(tf.zeros([self.config.n_features, self.config.n_classes]), tf.float32, name="weight")
self.b = tf.Variable(tf.zeros([self.config.batch_size, self.config.n_classes]), tf.float32, name="bias")
out = softmax(tf.matmul(input_data, self.W) + self.b)
### END YOUR CODE
return out
示例4: add_model
def add_model(self, input_data):
"""Adds a linear-layer plus a softmax transformation
The core transformation for this model which transforms a batch of input
data into a batch of predictions. In this case, the mathematical
transformation effected is
y = softmax(xW + b)
Hint: Make sure to create tf.Variables as needed. Also, make sure to use
tf.name_scope to ensure that your name spaces are clean.
Hint: For this simple use-case, it's sufficient to initialize both weights W
and biases b with zeros.
Args:
input_data: A tensor of shape (batch_size, n_features).
Returns:
out: A tensor of shape (batch_size, n_classes)
"""
# Create a variable.
self.w = tf.Variable(tf.zeros([self.config.n_features, self.config.n_classes]), name = "w")
self.b = tf.Variable(tf.zeros([self.config.n_classes]), name = "b")
out = softmax(tf.matmul(input_data, self.w) + self.b)
#w_hist = tf.histogram_summary("w", self.w)
return out
示例5: softmaxCostAndGradient
def softmaxCostAndGradient(predicted, target, outputVectors, data):
""" Softmax cost function for word2vec models """
# Implement the cost and gradients for one predicted word vector
# and one target word vector as a building block for word2vec
# models, assuming the softmax prediction function and cross
# entropy loss.
# Inputs:
# - predicted: numpy ndarray, predicted word vector (\hat{v} in
# the written component or \hat{r} in an earlier version)
# - target: integer, the index of the target word
# - outputVectors: "output" vectors (as rows) for all tokens
# - dataset: needed for negative sampling, unused here.
# Outputs:
# - cost: cross entropy cost for the softmax word prediction
# - gradPred: the gradient with respect to the predicted word
# vector
# - grad: the gradient with respect to all the other word
# vectors
# We will not provide starter code for this function, but feel
# free to reference the code you previously wrote for this
# assignment!
prods = np.dot(outputVectors,predicted.T) # 1xV
probs = softmax(prods) # 1xV
cost = -np.log(probs[target]) # 1x1
dscore = probs
dscore[target] -= 1.0
gradPred = np.dot(dscore,outputVectors)
grad = np.outer(dscore,predicted)
return cost, gradPred, grad
示例6: add_model
def add_model(self, input_data):
"""Adds a linear-layer plus a softmax transformation
The core transformation for this model which transforms a batch of input
data into a batch of predictions. In this case, the mathematical
transformation effected is
y = softmax(xW + b)
Hint: Make sure to create tf.Variables as needed. Also, make sure to use
tf.name_scope to ensure that your name spaces are clean.
Hint: For this simple use-case, it's sufficient to initialize both weights W
and biases b with zeros.
Args:
input_data: A tensor of shape (batch_size, n_features).
Returns:
out: A tensor of shape (batch_size, n_classes)
"""
### YOUR CODE HERE
with tf.variable_scope("model"):
W = tf.get_variable("W", shape=[self.config.n_features, self.config.n_classes], initializer=tf.random_normal_initializer(0.5, 0.1))
# W = tf.Variable(tf.random_normal(shape=[self.config.n_features, self.config.n_classes], dtype=tf.float32, name="weights"))
b = tf.get_variable("b", shape=[self.config.n_classes], initializer=tf.constant_initializer(0.0))
affine_transformation = tf.matmul(self.input_placeholder, W) + b
#tf.constant_initializer(value)
#tf.random_uniform_initializer(a,b)
# b = tf.Variable(tf.zeros(shape=[1,self.config.n_classes], dtype=tf.float32), name="bias")
# affine_transformation = tf.add(tf.matmul(W, self.input_placeholder), b, name="affine")
out = softmax(affine_transformation)
### END YOUR CODE
return out
示例7: softmaxRegression
def softmaxRegression(features, labels, weights, regularization = 0.0, nopredictions = False):
""" Softmax Regression """
# Implement softmax regression with weight regularization.
# Inputs:
# - features: feature vectors, each row is a feature vector
# - labels: labels corresponding to the feature vectors
# - weights: weights of the regressor
# - regularization: L2 regularization constant
# Output:
# - cost: cost of the regressor
# - grad: gradient of the regressor cost with respect to its
# weights
# - pred: label predictions of the regressor (you might find
# np.argmax helpful)
prob = softmax(features.dot(weights))
if len(features.shape) > 1:
N = features.shape[0]
else:
N = 1
# A vectorized implementation of 1/N * sum(cross_entropy(x_i, y_i)) + 1/2*|w|^2
cost = np.sum(-np.log(prob[range(N), labels])) / N
cost += 0.5 * regularization * np.sum(weights ** 2)
### YOUR CODE HERE: compute the gradients and predictions
raise NotImplementedError
### END YOUR CODE
if nopredictions:
return cost, grad
else:
return cost, grad, pred
示例8: add_model
def add_model(self, input_data):
"""Adds a linear-layer plus a softmax transformation
The core transformation for this model which transforms a batch of input
data into a batch of predictions. In this case, the mathematical
transformation effected is
y = softmax(xW + b)
Hint: Make sure to create tf.Variables as needed. Also, make sure to use
tf.name_scope to ensure that your name spaces are clean.
Hint: For this simple use-case, it's sufficient to initialize both weights W
and biases b with zeros.
Args:
input_data: A tensor of shape (batch_size, n_features).
Returns:
out: A tensor of shape (batch_size, n_classes)
"""
### YOUR CODE HERE
# W = tf.Variable(tf.zeros((self.config.n_features, self.config.n_classes)), name="weights")
# b = tf.Variable(tf.zeros((self.config.n_classes, )), name="biases")
with tf.variable_scope('softmax'):
W = tf.get_variable("weights", (self.config.n_features, self.config.n_classes),
initializer=tf.constant_initializer(0.0))
b = tf.get_variable("bias", (self.config.n_classes,),
initializer=tf.constant_initializer(0.0))
out = softmax(tf.matmul(input_data, W) + b)
### END YOUR CODE
return out
示例9: add_model
def add_model(self, input_data):
"""Adds a linear-layer plus a softmax transformation
The core transformation for this model which transforms a batch of input
data into a batch of predictions. In this case, the mathematical
transformation effected is
y = softmax(xW + b)
Hint: Make sure to create tf.Variables as needed. Also, make sure to use
tf.name_scope to ensure that your name spaces are clean.
Hint: For this simple use-case, it's sufficient to initialize both weights W
and biases b with zeros.
Args:
input_data: A tensor of shape (batch_size, n_features).
Returns:
out: A tensor of shape (batch_size, n_classes)
"""
### YOUR CODE HERE
n_features, n_classes = self.config.n_features, self.config.n_classes
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.zeros([n_features, n_classes]),
name='weights')
biases = tf.Variable(tf.zeros([n_classes]),
name='biases')
logits = tf.matmul(input_data, weights) + biases
out = softmax(logits)
### END YOUR CODE
return out
示例10: softmaxCostAndGradient
def softmaxCostAndGradient(predicted, target, outputVectors, dataset):
""" Softmax cost function for word2vec models """
# Inputs:
# - predicted: numpy ndarray, predicted word vector (\hat{v} in
# the written component or \hat{r} in an earlier version)
# - target: integer, the index of the target word
# - outputVectors: "output" vectors (as rows) for all tokens
# - dataset: needed for negative sampling, unused here.
# Outputs:
# - cost: cross entropy cost for the softmax word prediction
# - gradPred: the gradient with respect to the predicted word
# vector
# - grad: the gradient with respect to all the other word
# vectors
y = np.zeros((outputVectors.shape[0],))
y[target] = 1.0
y_hat = softmax(np.dot(outputVectors, predicted))
cost = -np.dot(y, np.log(y_hat))
gradPred = -outputVectors[target,:] + np.dot(outputVectors.T, y_hat)
grad = np.outer(y_hat - y, predicted)
return cost, gradPred, grad
示例11: forward_backward_prop
def forward_backward_prop(data, labels, params, dimensions):
"""
Forward and backward propagation for a two-layer sigmoidal network
Compute the forward propagation and for the cross entropy cost,
and backward propagation for the gradients for all parameters.
"""
### Unpack network parameters (do not modify)
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
### YOUR CODE HERE: forward propagation
# data: N x Dx, W1: Dx x H, b: 1 x H
a = data.dot(W1) + b1
h = sigmoid(a)
# h: N x H, W2: H x Dy, b2: 1 x Dy
t = h.dot(W2) + b2
y_hat = softmax(t)
# y_hat: N x Dy, labels: N x Dy (as int)
probs = labels * y_hat
cost = np.sum(-np.log(probs.sum(axis=1)))
### END YOUR CODE
### YOUR CODE HERE: backward propagation
# obtain the softmax gradient
dJdt = (y_hat - labels) # N x Dy
# b2 grad is sum along each index of the Dy vectors
gradb2 = np.sum(dJdt, 0)
# h: N x H, dJdt: N x Dy
gradW2 = h.T.dot(dJdt) # H x Dy
# dJdt: N x Dy, W2: H x Dy
dJdh = dJdt.dot(W2.T)
# h: N x H
dhda = sigmoid_grad(h)
# data: N x Dx, dhda: N x H, DJdh: N x H
gradW1 = data.T.dot(dhda * dJdh)
# dhda: N x H, DJdh: N x H
gradb1 = np.sum(dhda * dJdh, 0)
### END YOUR CODE
### Stack gradients (do not modify)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
return cost, grad
示例12: forward_backward_prop
def forward_backward_prop(data, labels, params, dimensions):
"""
Forward and backward propagation for a two-layer sigmoidal network
Compute the forward propagation and for the cross entropy cost,
and backward propagation for the gradients for all parameters.
"""
### Unpack network parameters (do not modify)
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
### YOUR CODE HERE: forward propagation
#z1 = data.dot(W1) + b1
#hidden = sigmoid(z1)
#z2 = hidden.dot(W2) + b2
#print 'z2.shape: ', z2.shape
#prediction = softmax(z2)
### END YOUR CODE
hidden = sigmoid(data.dot(W1) + b1)
prediction = softmax(hidden.dot(W2) + b2)
cost = -np.sum(np.log(prediction) * labels)
### YOUR CODE HERE: backward propagation
#print 'NN: ', Dx, H, Dy
#print 'b1.shape: ', b1.shape
#print 'prediction.shape: ', prediction.shape
#print 'labels.shape : ', labels.shape
#print 'W2.shape: ', W2.shape
#print 'hidden.shape: ', hidden.shape
#print 'hidden.T.shape: ', hidden.T.shape
#print 'delta.shape: ', delta.shape
#print 'W1.shape: ', W1.shape
#print 'data.shape: ', data.shape
#gradW2 = delta * hidden
#print 'sigmoid_grad(hidden).shape: ', sigmoid_grad(hidden).shape
delta = prediction - labels
gradW2 = hidden.T.dot(delta)
gradb2 = np.sum(delta, axis = 0)
hidden_delta = delta.dot(W2.T) * sigmoid_grad(hidden)
gradW1 = data.T.dot(hidden_delta)
gradb1 = np.sum(hidden_delta, axis = 0)
### END YOUR CODE
### Stack gradients (do not modify)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
return cost, grad
示例13: softmaxCostAndGradient
def softmaxCostAndGradient(predicted, target, outputVectors, dataset):
""" Softmax cost function for word2vec models """
# Implement the cost and gradients for one predicted word vector
# and one target word vector as a building block for word2vec
# models, assuming the softmax prediction function and cross
# entropy loss.
# Inputs:
# - predicted: numpy ndarray, predicted word vector (\hat{v} in
# the written component or \hat{r} in an earlier version)
# - target: integer, the index of the target word
# - outputVectors: "output" vectors (as rows) for all tokens
# - dataset: needed for negative sampling, unused here.
# Outputs:
# - cost: cross entropy cost for the softmax word prediction
# - gradPred: the gradient with respect to the predicted word
# vector
# - grad: the gradient with respect to all the other word
# vectors
# We will not provide starter code for this function, but feel
# free to reference the code you previously wrote for this
# assignment!
### YOUR CODE HERE
'''
Keep track of dims:
D - dim of word vector
V - number of words
predicted : (D, )
target : integer
outputVectors : (V, D)
cost : float
gradPred : (D, )
grad : (V, D)
'''
predicted = predicted.reshape(-1, 1)
scores = outputVectors.dot(predicted) # (V, 1)
probs = softmax(scores.T) # (1, V)
targetProb = probs[0, target]
cost = -np.log(targetProb)
scores_exp = np.exp(scores) # (V, 1)
scores_exp_sum = np.sum(scores_exp) # float
gradPred = - outputVectors[target, :] + np.sum(scores_exp * outputVectors, axis=0) / scores_exp_sum # (D, )
grad = scores_exp.dot(predicted.T) / scores_exp_sum # (V, D)
grad[target, :] -= predicted.reshape(-1)
### END YOUR CODE
return cost, gradPred, grad
示例14: forward_backward_prop
def forward_backward_prop(data, labels, params, dimensions):
"""
Forward and backward propagation for a two-layer sigmoidal network
Compute the forward propagation and for the cross entropy cost,
and backward propagation for the gradients for all parameters.
"""
### Unpack network parameters (do not modify)
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
### YOUR CODE HERE: forward propagation
# data : N * Dx
# W1 : Dx * H
# b1 : 1 * H
# W2 : H * Dy
# b2 : 1 * Dy
N = data.shape[0]
z1 = data.dot(W1) + b1
a1 = sigmoid(z1) # N * H
z2 = a1.dot(W2) + b2
a2 = softmax(z2) # N * Dy
cost = np.sum(-np.log(a2[labels == 1])) / N
### END YOUR CODE
### YOUR CODE HERE: backward propagation
delta_score = a2 - labels # 1 * Dy
delta_score /= N
gradW2 = np.dot(a1.T, delta_score) # H * 1 * 1 * Dy = H * Dy
gradb2 = np.sum(delta_score, axis=0)
grad_h = np.dot(delta_score, W2.T) # 1 * Dy * Dy * H = 1 * H
grad_h = sigmoid_grad(a1) * grad_h
gradW1 = np.dot(data.T, grad_h)
gradb1 = np.sum(grad_h, axis=0)
### END YOUR CODE
### Stack gradients (do not modify)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
return cost, grad
示例15: softmaxCostAndGradient
def softmaxCostAndGradient(predicted, target, outputVectors, dataset):
""" Softmax cost function for word2vec models
Implement the cost and gradients for one predicted word vector
and one target word vector as a building block for word2vec
models, assuming the softmax prediction function and cross
entropy loss.
Arguments:
predicted -- numpy ndarray, predicted word vector (\hat{v} in
the written component)
target -- integer, the index of the target word
outputVectors -- "output" vectors (as rows) for all tokens
dataset -- needed for negative sampling, unused here.
Return:
cost -- cross entropy cost for the softmax word prediction
gradPred -- the gradient with respect to the predicted word
vector
grad -- the gradient with respect to all the other word
vectors
We will not provide starter code for this function, but feel
free to reference the code you previously wrote for this
assignment!
"""
### YOUR CODE HERE
#raise NotImplementedError
v_c = predicted
U = outputVectors
N = U.shape[0]
#print v_c.shape, U.shape
theta = np.zeros(N)
for i in range(N):
theta[i] = np.dot(U[i], v_c)
y_hat = softmax(theta)
#print y_hat.shape
cost = -np.log(y_hat[target])
gradPred = -U[target]
for i in range(N):
gradPred += U[i]*y_hat[i]
grad = np.zeros((N, len(v_c)))
for i in range(N):
if i == target:
grad[i] = (y_hat[i] - 1)*v_c
else:
grad[i] = y_hat[i]*v_c
#print grad.shape, gradPred.shape
### END YOUR CODE
return cost, gradPred, grad