本文整理匯總了Python中pycuda.gpuarray.zeros方法的典型用法代碼示例。如果您正苦於以下問題:Python gpuarray.zeros方法的具體用法?Python gpuarray.zeros怎麽用?Python gpuarray.zeros使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pycuda.gpuarray
的用法示例。
在下文中一共展示了gpuarray.zeros方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: cuda_render
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def cuda_render(self,pts,face_set):
pts = pts.astype(np.float32)
v = ((np.round(self.fy*pts[:,1]/pts[:,2]+self.cy)).astype(np.int)).astype(np.float32)
u = ((np.round(self.fx*pts[:,0]/pts[:,2]+self.cx)).astype(np.int)).astype(np.float32)
depth_b = gpuarray.zeros((self.res_y*self.res_x), dtype=np.float32)+100#+90000
depth_mask = np.zeros((self.res_y*self.res_x),dtype=np.float32)
bbox = gpuarray.zeros((4),dtype=np.float32)
bbox[0:2]=np.array([9999,9999],dtype=np.float32)
max_idx = np.ones((face_set.shape[0]), dtype=np.float32)
grid_n= int((face_set.shape[0]/self.n_block))+1
self.rendering(drv.In(v[face_set[:,0]]), drv.In(v[face_set[:,1]]),drv.In(v[face_set[:,2]]),
drv.In(u[face_set[:,0]]), drv.In(u[face_set[:,1]]),drv.In(u[face_set[:,2]]),
drv.In(pts[face_set[:,0],2]), drv.In(pts[face_set[:,1],2]),drv.In(pts[face_set[:,2],2]),
depth_b,drv.In(max_idx), drv.Out(depth_mask),bbox,
block=(self.n_block, 1, 1), grid=(grid_n, 1, 1))
img = depth_b.get()
img[img==100]=0
img= np.reshape(img,(self.res_y,self.res_x))
mask = np.reshape(depth_mask,(self.res_y,self.res_x)).astype(bool)
bbox_final = bbox.get()
return img,mask,bbox_final.astype(np.int)
示例2: coulomb_energy
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def coulomb_energy(f, param):
"""
It calculates the Coulomb energy .
Arguments
----------
f : class, region in the field array where we desire to calculate the
coloumb energy.
param: class, parameters related to the surface.
Returns
--------
E_coul: float, coloumb energy.
"""
point_energy = numpy.zeros(len(f.q), param.REAL)
coulomb_direct(f.xq[:, 0], f.xq[:, 1], f.xq[:, 2], f.q, point_energy)
cal2J = 4.184
C0 = param.qe**2 * param.Na * 1e-3 * 1e10 / (cal2J * param.E_0)
E_coul = numpy.sum(point_energy) * 0.5 * C0 / (4 * pi * f.E)
return E_coul
示例3: zeros_cuda
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def zeros_cuda(shape):
"""Create GPUArray of zeros directly on GPU memory.
Parameters
----------
shape : tuple
Dimensions of the GPUArray.
Returns
-------
gpuarray
GPUArray of zeros.
Examples
--------
>>> a = zeros_cuda((3, 2))
[[ 0., 0.],
[ 0., 0.],
[ 0., 0.]]
>>> type(a)
<class 'pycuda.gpuarray.GPUArray'>
"""
return cuda_array.zeros(shape, dtype=float32)
示例4: init_buffers
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def init_buffers(self):
shape = self.op.args[0].tensor_description().shape
dtype = self.op.args[0].tensor_description().dtype
n_devs = len(self.op.device_ids)
size = self.op.args[0].tensor_description().axes.size
segment_size = calculate_segment_size(size, n_devs)
# Allocate output and scratch buffers
self.output_buff = gpuarray.zeros(shape, dtype)
self.scratch_buff = gpuarray.zeros(segment_size * n_devs, dtype)
self.output_buff_dict[self.device_id] = self.output_buff.gpudata
self.scratch_buff_dict[self.device_id] = self.scratch_buff.gpudata
# Allocate IPC handles
output_ipc_hdl = drv.mem_get_ipc_handle(self.output_buff.gpudata)
scratch_ipc_hdl = drv.mem_get_ipc_handle(self.scratch_buff.gpudata)
event_ipc_hdl = self.event.ipc_handle()
# Broadcast handles to others
msg = (self.device_id, output_ipc_hdl, scratch_ipc_hdl, event_ipc_hdl)
for i in self.device_ids:
if i == self.device_id:
self.comm.bcast(msg, root=i)
else:
(peer_id,
output_ipc_hdl,
scratch_ipc_hdl,
event_ipc_hdl) = self.comm.bcast(None, root=i)
output_hdl = drv.IPCMemoryHandle(output_ipc_hdl)
scratch_hdl = drv.IPCMemoryHandle(scratch_ipc_hdl)
event_hdl = drv.Event.from_ipc_handle(event_ipc_hdl)
self.output_buff_dict[peer_id] = output_hdl
self.scratch_buff_dict[peer_id] = scratch_hdl
self.event_buff_dict[peer_id] = event_hdl
示例5: zeros
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def zeros(self, *args, **kw):
kw['dtype'] = self.floattype
return np.zeros(*args, **kw)
示例6: predict
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def predict(self, x, stream=None):
if stream is None:
stream = self.stream
if type(x) != np.ndarray:
temp = np.array(x, dtype = np.float32)
x = temp
if(x.size == self.network_mem[0].size):
self.network_mem[0].set_async(x, stream=stream)
else:
if x.size > self.network_mem[0].size:
raise Exception("Error: batch size too large for input.")
x0 = np.zeros((self.network_mem[0].size,), dtype=np.float32)
x0[0:x.size] = x.ravel()
self.network_mem[0].set_async(x0.reshape(self.network_mem[0].shape), stream=stream)
if(len(x.shape) == 2):
batch_size = x.shape[0]
else:
batch_size = 1
for i in xrange(len(self.network)):
self.network[i].eval_(x=self.network_mem[i], y = self.network_mem[i+1], batch_size=batch_size, stream = stream)
y = self.network_mem[-1].get_async(stream=stream)
if len(y.shape) == 2:
y = y[0:batch_size, :]
return y
開發者ID:PacktPublishing,項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA,代碼行數:36,代碼來源:deep_neural_network.py
示例7: nppiFilter
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def nppiFilter(src, kernel, roi = None, dst = None):
if len(src.shape) < 2 or len(src.shape) > 3 or (len(src.shape) == 3 and src.shape[-1] > 4):
raise RuntimeError("Only 2D convolution supported")
if len(kernel.shape) < 2 or len(kernel.shape) > 3 or (len(kernel.shape) == 3 and kernel.shape[-1] != 1):
raise RuntimeError("Only 2D convolution supported")
if roi is None:
# x,y,width,height
roi = [0,0,src.shape[1],src.shape[0]]
if src.dtype == np.float32 and kernel.dtype == np.float32:
if dst is None:
dst = gpuarray.zeros(src.shape, src.dtype)
if dst.dtype != np.float32 or dst.dtype != src.dtype:
raise RuntimeError("Unsupported destination image.")
srcp = int(src.gpudata)
dstp = int(dst.gpudata)
es = 4
if len(src.shape) == 2 or (len(src.shape) == 3 and src.shape[-1] == 1):
nppfunc = nppi.nppiFilter_32f_C1R
srcp += (roi[0] + roi[1]*src.shape[1])*es
dstp += (roi[0] + roi[1]*dst.shape[1])*es
src_step = src.shape[1]*es
dst_step = dst.shape[1]*es
elif len(src.shape) == 3 and src.shape[-1] > 0:
srcp += (roi[0] + roi[1]*src.shape[1])*src.shape[2]*es
dstp += (roi[0] + roi[1]*dst.shape[1])*src.shape[2]*es
src_step = src.shape[1]*src.shape[2]*es
dst_step = dst.shape[1]*src.shape[2]*es
if src.shape[-1] == 2:
nppfunc = nppi.nppiFilter_32f_C2R
elif src.shape[-1] == 3:
nppfunc = nppi.nppiFilter_32f_C3R
elif src.shape[-1] == 4:
nppfunc = nppi.nppiFilter_32f_C4R
else:
raise RuntgimeError("Not supported")
oSizeROI = NppiSize()
oSizeROI.width = roi[2]
oSizeROI.height = roi[3]
kernelp = int(kernel.gpudata)
kernelSize = NppiSize()
kernelSize.width = kernel.shape[1]
kernelSize.height = kernel.shape[0]
anchor = NppiPoint()
anchor.x = kernel.shape[1]//2
anchor.y = kernel.shape[0]//2
status = nppfunc(cast(srcp, POINTER(c_float)), src_step, cast(dstp, POINTER(c_float)), dst_step, oSizeROI, cast(kernelp, POINTER(c_float)), kernelSize, anchor)
if status < 0:
raise RuntimeError("Npp library returned %d" % status)
return dst
raise RuntimeError("Not supported")
示例8: __init__
# 需要導入模塊: from pycuda import gpuarray [as 別名]
# 或者: from pycuda.gpuarray import zeros [as 別名]
def __init__(self, num_inputs=None, num_outputs=None, weights=None, b=None, stream=None, \
relu=False, sigmoid=False, delta=None):
self.stream = stream
if delta is None:
self.delta = np.float32(0.001)
else:
self.delta = np.float32(delta)
if weights is None:
weights = (np.random.rand(num_outputs, num_inputs) -.5 )
self.num_inputs = np.int32(num_inputs)
self.num_outputs = np.int32(num_outputs)
if type(weights) != pycuda.gpuarray.GPUArray:
self.weights = gpuarray.to_gpu_async(np.array(weights, dtype=np.float32) , stream = self.stream)
else:
self.weights = weights
if num_inputs is None or num_outputs is None:
self.num_inputs = np.int32(self.weights.shape[1])
self.num_outputs = np.int32(self.weights.shape[0])
else:
self.num_inputs = np.int32(num_inputs)
self.num_outputs = np.int32(num_outputs)
if b is None:
b = gpuarray.zeros((self.num_outputs,),dtype=np.float32)
if type(b) != pycuda.gpuarray.GPUArray:
self.b = gpuarray.to_gpu_async(np.array(b, dtype=np.float32) , stream = self.stream)
else:
self.b = b
self.relu = np.int32(relu)
self.sigmoid = np.int32(sigmoid)
self.block = (32,1,1)
self.grid = (int(np.ceil(self.num_outputs / 32)), 1,1)
開發者ID:PacktPublishing,項目名稱:Hands-On-GPU-Programming-with-Python-and-CUDA,代碼行數:47,代碼來源:deep_neural_network.py