本文整理汇总了Python中skcuda.linalg.dot函数的典型用法代码示例。如果您正苦于以下问题:Python dot函数的具体用法?Python dot怎么用?Python dot使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dot函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: conv2d_forward_batch
def conv2d_forward_batch(self, inputs, params, bias, outputs,
padding, stride):
num_filters = params.shape[0]
num_images, input_rows, input_cols, num_input_maps = inputs.shape
kernel_shape = params.shape[1:]
num_output_pixels = outputs.shape[1] * outputs.shape[2]
num_kernel_params = np.prod(kernel_shape)
out_shape = (num_output_pixels, num_filters)
num_cuda_kernels = num_output_pixels * num_input_maps
for i in range(num_images):
col = self.zeros((num_output_pixels, num_kernel_params))
_im2col_fp32_impl(np.int32(num_cuda_kernels), inputs[i],
np.int32(input_rows), np.int32(input_cols),
np.int32(kernel_shape[0]),
np.int32(kernel_shape[1]),
np.int32(padding), np.int32(padding),
np.int32(stride[0]), np.int32(stride[1]),
np.int32(outputs.shape[2]),
np.int32(num_input_maps),
col.gpudata,
block=(NUM_CUDA_THREADS, 1, 1),
grid=(get_blocks(num_cuda_kernels), 1))
reshaped_params = params.reshape(num_filters, num_kernel_params)
culinalg.dot(col, reshaped_params, transb='T',
out=outputs[i].reshape(out_shape))
flat_outputs = flatten_all_but_last(outputs)
self.add_mv(flat_outputs, bias, flat_outputs)
示例2: dot3
def dot3(A, b):
''' Calculates matrix multiplication "b.T*A*b" on GPU. '''
#print("dot3 "+str(A.shape)+" "+str(b.shape))
# send A to GPU
A_gpu = gpuarray.to_gpu(A)
# send b to GPU
b_gpu = gpuarray.to_gpu(b)
temp_gpu = linalg.dot(A_gpu, b_gpu)
A_gpu.gpudata.free()
del(A_gpu)
# transpose b on GPU
bt_gpu = linalg.transpose(b_gpu)
#remove b
b_gpu.gpudata.free()
del(b_gpu)
out_gpu = linalg.dot(bt_gpu, temp_gpu)
return out_gpu.get()
示例3: dot3
def dot3(A, b):
''' Calculates matrix multiplication "b.T*A*b" on GPU.
A has to be nxn. '''
#print("dot3 "+str(A.shape)+" "+str(b.shape))
# Make sure we dont run out of memory on the GPU
if ((A.size + 2*b.size) <= 629088256):
# send A to GPU
A_gpu = gpuarray.to_gpu(A)
# send b to GPU
b_gpu = gpuarray.to_gpu(b)
temp_gpu = linalg.dot(A_gpu, b_gpu)
A_gpu.gpudata.free()
del(A_gpu)
# transpose b on GPU
bt_gpu = linalg.transpose(b_gpu)
#remove b
b_gpu.gpudata.free()
del(b_gpu)
out_gpu = linalg.dot(bt_gpu, temp_gpu)
return out_gpu.get()
else:
print("Too big for GPU, using CPU.")
return np.dot(np.dot(b.T, A), b)
示例4: cuda_dot3
def cuda_dot3(A, b):
print("cuda_dot3", A.shape, b.shape)
# send b to GPU
b_gpu = gpuarray.to_gpu(b)
# transpose b on GPU
bt_gpu = linalg.transpose(b_gpu)
#remove b for now
b_gpu.gpudata.free()
del(b_gpu)
# send A to GPU
A_gpu = gpuarray.to_gpu(A)
temp_gpu = linalg.dot(bt_gpu, A_gpu)
bt_gpu.gpudata.free()
del(bt_gpu)
A_gpu.gpudata.free()
del(A_gpu)
# send b to GPU
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(temp_gpu, b_gpu)
temp_gpu.gpudata.free()
del(temp_gpu)
b_gpu.gpudata.free()
del(b_gpu)
#theoretically possible to move into RAM, force cleanup on GPU and then return from RAM
#but most likely not necessary
return c_gpu.get()
示例5: test_dot_matrix_h_complex128
def test_dot_matrix_h_complex128(self):
a = np.asarray(np.random.rand(2, 4)+1j*np.random.rand(2, 4), np.complex128)
b = np.asarray(np.random.rand(2, 2)+1j*np.random.rand(2, 2), np.complex128)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(a_gpu, b_gpu, 'c')
assert np.allclose(np.dot(a.conj().T, b), c_gpu.get())
a = a.astype(np.complex128, order="F", copy=True)
b = b.astype(np.complex128, order="F", copy=True)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(a_gpu, b_gpu, 'c')
assert np.allclose(np.dot(a.conj().T, b), c_gpu.get())
示例6: test_dot_vector_complex128
def test_dot_vector_complex128(self):
a = np.asarray(np.random.rand(5), np.complex128)
b = np.asarray(np.random.rand(5), np.complex128)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c = linalg.dot(a_gpu, b_gpu)
assert np.allclose(np.dot(a, b), c)
a = a.astype(np.complex128, order="F", copy=True)
b = b.astype(np.complex128, order="F", copy=True)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c = linalg.dot(a_gpu, b_gpu)
assert np.allclose(np.dot(a, b), c)
示例7: initParallelAlgorithms
def initParallelAlgorithms():
global bitonicSort_
fin = open("ParallelAlgorithms/bitonicSort.cu")
mod = SourceModule(fin.read())
fin.close()
bitonicSort_ = mod.get_function("bitonicSort")
global finishCSM_
global getSumSquares_
fin = open("ParallelAlgorithms/CSMHelper.cu")
mod = SourceModule(fin.read())
fin.close()
finishCSM_ = mod.get_function("finishCSM")
getSumSquares_ = mod.get_function("getSumSquares")
#Run each of the algorithms on dummy data so that they're pre-compiled
#1) Bitonic Sort
X = np.random.randn(16, 16)
N = np.int32(16)
NPow2 = N
NThreads = N/2
XG = gpuarray.to_gpu(X)
bitonicSort_(XG, N, NPow2, block=(NThreads, 1, 1), grid=(X.shape[0], 1), shared=4*NPow2)
linalg.init()
#2) Other primitive operations
NegXDotX = linalg.dot(XG, XG)
XPlusX = skcuda.misc.add(XG, XG)
XSqr = skcuda.misc.multiply(XG, XG)
XSqr = skcuda.misc.sum(XSqr, 1)
XPlusCol = skcuda.misc.add_matvec(XG, XSqr, 0)
示例8: _dev_lin
def _dev_lin(self, devX, devW, devB):
"""Linear function on GPU.
Returns:
devH (gpuarray): GPU matrix with the result.
"""
devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
return devH
示例9: NNMF_gpu
def NNMF_gpu(X,r,tol,V=v0,W=w0,verbose=1):
Vr = V[:,0:r].copy()
Wr = W[0:r,:].copy()
X_gpu = gpuarray.to_gpu(X)
V_gpu = gpuarray.to_gpu(Vr)
W_gpu = gpuarray.to_gpu(Wr)
#Frobinius norm at previous step
B_gpu = linalg.dot(V_gpu, W_gpu)
L = linalg.norm(X_gpu-B_gpu)**2
iteration = 0
while 1: #update V
V_gpu *= linalg.dot(X_gpu,linalg.transpose(W_gpu))
V_gpu /= linalg.dot(B_gpu,linalg.transpose(W_gpu))
B_gpu = linalg.dot(V_gpu, W_gpu)
#update W
W_gpu *= linalg.dot(linalg.transpose(V_gpu),X_gpu)
W_gpu /= linalg.dot(linalg.transpose(V_gpu),B_gpu)
B_gpu = linalg.dot(V_gpu, W_gpu)
Lnew = linalg.norm(X_gpu-B_gpu)**2
if abs(Lnew-L) <= tol*(L+1):
break
else:
L = Lnew
iteration += 1
if(verbose and iteration%50==0):
print "At iteration %i, the loss is %.2f" %(iteration, L)
return V_gpu,W_gpu,iteration
示例10: _dot_matrix_tests
def _dot_matrix_tests(self, dtype, transa, transb):
a = np.asarray(np.random.rand(4, 2), dtype)
if transa == 'n':
b = np.asarray(np.random.rand(2, 2), dtype)
else:
b = np.asarray(np.random.rand(4, 4), dtype)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(a_gpu, b_gpu, transa, transb)
aa = a if transa == 'n' else a.T
bb = b if transb == 'n' else b.T
assert np.allclose(np.dot(aa, bb), c_gpu.get())
a = a.astype(dtype, order="F", copy=True)
b = b.astype(dtype, order="F", copy=True)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(a_gpu, b_gpu, transa, transb)
assert np.allclose(np.dot(aa, bb), c_gpu.get())
示例11: _dev_tanh
def _dev_tanh(self, devX, devW, devB):
"""Hyperbolic tangent function on GPU.
Returns:
devH (gpuarray): GPU matrix with the result.
"""
devH = misc.add_matvec(linalg.dot(devX, devW), devB, axis=1)
cumath.tanh(devH, out=devH)
return devH
示例12: dot
def dot(a, b):
''' Calculates matrix multiplication "a*b" on GPU. '''
#print("dot "+str(a.shape)+" "+str(b.shape))
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
d_gpu = linalg.dot(a_gpu, b_gpu)
return d_gpu.get()
示例13: _dot_matrix_vector_tests
def _dot_matrix_vector_tests(self, dtype):
a = np.asarray(np.random.rand(4, 4), dtype)
b = np.asarray(np.random.rand(4), dtype)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(a_gpu, b_gpu)
assert np.allclose(np.dot(a, b), c_gpu.get())
a = np.asarray(np.random.rand(4), dtype)
b = np.asarray(np.random.rand(4, 4), dtype)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(a_gpu, b_gpu)
assert np.allclose(np.dot(a, b), c_gpu.get())
a = np.asarray(np.random.rand(4, 4), dtype)
b = np.asarray(np.random.rand(4, 1), dtype)
a_gpu = gpuarray.to_gpu(a)
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(a_gpu, b_gpu)
assert np.allclose(np.dot(a, b), c_gpu.get())
示例14: mldivide
def mldivide(A, B):
''' CULA would be necessary for this function to work. :-/ '''
A_gpu = gpuarray.to_gpu(A)
A_inv_gpu = linalg.inv(A)
A_gpu.gpudata.free()
del(A_gpu)
B_gpu = gpuarray.to_gpu(B)
out_gpu = linalg.dot(A_inv_gpu, B_gpu)
return out_gpu.get()
示例15: getCSMGPU
def getCSMGPU(XG, YG):
tbegin = time.time()
GPUNeg2 = gpuarray.to_gpu(np.array([-2.0], dtype=np.float32))
YGT = linalg.transpose(YG)
XSqr = skcuda.misc.multiply(XG, XG)
XSqr = skcuda.misc.sum(XSqr, 1)
YSqr = skcuda.misc.multiply(YG, YG)
YSqr = skcuda.misc.sum(YSqr, 1)
C = linalg.dot(XG, YGT)
C = skcuda.misc.multiply(GPUNeg2, C)
skcuda.misc.add_matvec(C, XSqr, 0, C)
skcuda.misc.add_matvec(C, YSqr, 1, C)
return C