Python gpuarray.zeros方法代码示例

本文整理汇总了Python中pycuda.gpuarray.zeros方法的典型用法代码示例。如果您正苦于以下问题：Python gpuarray.zeros方法的具体用法？Python gpuarray.zeros怎么用？Python gpuarray.zeros使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycuda.gpuarray的用法示例。

在下文中一共展示了gpuarray.zeros方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: cuda_render

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def cuda_render(self,pts,face_set):
        pts = pts.astype(np.float32)
        v = ((np.round(self.fy*pts[:,1]/pts[:,2]+self.cy)).astype(np.int)).astype(np.float32)
        u = ((np.round(self.fx*pts[:,0]/pts[:,2]+self.cx)).astype(np.int)).astype(np.float32)
        depth_b = gpuarray.zeros((self.res_y*self.res_x), dtype=np.float32)+100#+90000
        depth_mask = np.zeros((self.res_y*self.res_x),dtype=np.float32)
        bbox = gpuarray.zeros((4),dtype=np.float32)
        bbox[0:2]=np.array([9999,9999],dtype=np.float32)


        max_idx = np.ones((face_set.shape[0]), dtype=np.float32)
        grid_n= int((face_set.shape[0]/self.n_block))+1
        self.rendering(drv.In(v[face_set[:,0]]), drv.In(v[face_set[:,1]]),drv.In(v[face_set[:,2]]),
                          drv.In(u[face_set[:,0]]), drv.In(u[face_set[:,1]]),drv.In(u[face_set[:,2]]),
                          drv.In(pts[face_set[:,0],2]), drv.In(pts[face_set[:,1],2]),drv.In(pts[face_set[:,2],2]),
                          depth_b,drv.In(max_idx), drv.Out(depth_mask),bbox,
                          block=(self.n_block, 1, 1), grid=(grid_n, 1, 1))
        img = depth_b.get()
        img[img==100]=0
        img= np.reshape(img,(self.res_y,self.res_x))
        mask = np.reshape(depth_mask,(self.res_y,self.res_x)).astype(bool)
        bbox_final = bbox.get()
        return img,mask,bbox_final.astype(np.int)

开发者ID:kirumang，项目名称:Pix2Pose，代码行数:25，代码来源:gpu_render.py

示例2: coulomb_energy

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def coulomb_energy(f, param):
    """
    It calculates the Coulomb energy .

    Arguments
    ----------
    f    : class, region in the field array where we desire to calculate the
                  coloumb energy.
    param: class, parameters related to the surface.

    Returns
    --------
    E_coul: float, coloumb energy.
    """

    point_energy = numpy.zeros(len(f.q), param.REAL)
    coulomb_direct(f.xq[:, 0], f.xq[:, 1], f.xq[:, 2], f.q, point_energy)

    cal2J = 4.184
    C0 = param.qe**2 * param.Na * 1e-3 * 1e10 / (cal2J * param.E_0)

    E_coul = numpy.sum(point_energy) * 0.5 * C0 / (4 * pi * f.E)
    return E_coul

开发者ID:pygbe，项目名称:pygbe，代码行数:25，代码来源:matrixfree.py

示例3: zeros_cuda

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def zeros_cuda(shape):
    """Create GPUArray of zeros directly on GPU memory.

    Parameters
    ----------
    shape : tuple
        Dimensions of the GPUArray.

    Returns
    -------
    gpuarray
        GPUArray of zeros.

    Examples
    --------
    >>> a = zeros_cuda((3, 2))
    [[ 0.,  0.],
     [ 0.,  0.],
     [ 0.,  0.]]

    >>> type(a)
    <class 'pycuda.gpuarray.GPUArray'>
    """
    return cuda_array.zeros(shape, dtype=float32)

开发者ID:compas-dev，项目名称:compas，代码行数:26，代码来源:cuda.py

示例4: init_buffers

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def init_buffers(self):
        shape = self.op.args[0].tensor_description().shape
        dtype = self.op.args[0].tensor_description().dtype

        n_devs = len(self.op.device_ids)
        size = self.op.args[0].tensor_description().axes.size
        segment_size = calculate_segment_size(size, n_devs)

        # Allocate output and scratch buffers
        self.output_buff = gpuarray.zeros(shape, dtype)
        self.scratch_buff = gpuarray.zeros(segment_size * n_devs, dtype)

        self.output_buff_dict[self.device_id] = self.output_buff.gpudata
        self.scratch_buff_dict[self.device_id] = self.scratch_buff.gpudata

        # Allocate IPC handles
        output_ipc_hdl = drv.mem_get_ipc_handle(self.output_buff.gpudata)
        scratch_ipc_hdl = drv.mem_get_ipc_handle(self.scratch_buff.gpudata)
        event_ipc_hdl = self.event.ipc_handle()

        # Broadcast handles to others
        msg = (self.device_id, output_ipc_hdl, scratch_ipc_hdl, event_ipc_hdl)
        for i in self.device_ids:
            if i == self.device_id:
                self.comm.bcast(msg, root=i)
            else:
                (peer_id,
                 output_ipc_hdl,
                 scratch_ipc_hdl,
                 event_ipc_hdl) = self.comm.bcast(None, root=i)

                output_hdl = drv.IPCMemoryHandle(output_ipc_hdl)
                scratch_hdl = drv.IPCMemoryHandle(scratch_ipc_hdl)
                event_hdl = drv.Event.from_ipc_handle(event_ipc_hdl)
                self.output_buff_dict[peer_id] = output_hdl
                self.scratch_buff_dict[peer_id] = scratch_hdl
                self.event_buff_dict[peer_id] = event_hdl

开发者ID:NervanaSystems，项目名称:ngraph-python，代码行数:39，代码来源:tensor_ops.py

示例5: zeros

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def zeros(self, *args, **kw):
        kw['dtype'] = self.floattype
        return np.zeros(*args, **kw)

开发者ID:comp-imaging，项目名称:ProxImaL，代码行数:5，代码来源:cuda_codegen.py

示例6: predict

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def predict(self, x, stream=None):
        
        if stream is None:
            stream = self.stream
        
        if type(x) != np.ndarray:
            temp = np.array(x, dtype = np.float32)
            x = temp
        
        if(x.size == self.network_mem[0].size):
            self.network_mem[0].set_async(x, stream=stream)
        else:
            
            if x.size > self.network_mem[0].size:
                raise Exception("Error: batch size too large for input.")
            
            x0 = np.zeros((self.network_mem[0].size,), dtype=np.float32)
            x0[0:x.size] = x.ravel()
            self.network_mem[0].set_async(x0.reshape(self.network_mem[0].shape), stream=stream)
        
        if(len(x.shape) == 2):
            batch_size = x.shape[0]
        else:
            batch_size = 1
        
        for i in xrange(len(self.network)):
            self.network[i].eval_(x=self.network_mem[i], y = self.network_mem[i+1], batch_size=batch_size, stream = stream)
            
        y = self.network_mem[-1].get_async(stream=stream)
        
        if len(y.shape) == 2:
            y = y[0:batch_size, :]
        
        return y

开发者ID:PacktPublishing，项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA，代码行数:36，代码来源:deep_neural_network.py

示例7: nppiFilter

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def nppiFilter(src, kernel, roi = None, dst = None):
    if len(src.shape) < 2 or len(src.shape) > 3 or (len(src.shape) == 3 and src.shape[-1] > 4):
        raise RuntimeError("Only 2D convolution supported")
    if len(kernel.shape) < 2 or len(kernel.shape) > 3 or (len(kernel.shape) == 3 and kernel.shape[-1] != 1):
        raise RuntimeError("Only 2D convolution supported")

    if roi is None:
        # x,y,width,height
        roi = [0,0,src.shape[1],src.shape[0]]

    if src.dtype == np.float32 and kernel.dtype == np.float32:
        if dst is None:
            dst = gpuarray.zeros(src.shape, src.dtype)
        if dst.dtype != np.float32 or dst.dtype != src.dtype:
            raise RuntimeError("Unsupported destination image.")
        srcp = int(src.gpudata)
        dstp = int(dst.gpudata)

        es = 4

        if len(src.shape) == 2 or (len(src.shape) == 3 and src.shape[-1] == 1):
            nppfunc = nppi.nppiFilter_32f_C1R
            srcp += (roi[0] + roi[1]*src.shape[1])*es
            dstp += (roi[0] + roi[1]*dst.shape[1])*es
            src_step = src.shape[1]*es
            dst_step = dst.shape[1]*es

        elif len(src.shape) == 3 and src.shape[-1] > 0:
            srcp += (roi[0] + roi[1]*src.shape[1])*src.shape[2]*es
            dstp += (roi[0] + roi[1]*dst.shape[1])*src.shape[2]*es
            src_step = src.shape[1]*src.shape[2]*es
            dst_step = dst.shape[1]*src.shape[2]*es
            if src.shape[-1] == 2:
                nppfunc = nppi.nppiFilter_32f_C2R
            elif src.shape[-1] == 3:
                nppfunc = nppi.nppiFilter_32f_C3R
            elif src.shape[-1] == 4:
                nppfunc = nppi.nppiFilter_32f_C4R
            else:
                raise RuntgimeError("Not supported")

        oSizeROI = NppiSize()
        oSizeROI.width = roi[2]
        oSizeROI.height = roi[3]

        kernelp = int(kernel.gpudata)
        kernelSize = NppiSize()
        kernelSize.width = kernel.shape[1]
        kernelSize.height = kernel.shape[0]

        anchor = NppiPoint()
        anchor.x = kernel.shape[1]//2
        anchor.y = kernel.shape[0]//2

        status = nppfunc(cast(srcp, POINTER(c_float)), src_step, cast(dstp, POINTER(c_float)), dst_step, oSizeROI, cast(kernelp, POINTER(c_float)), kernelSize, anchor)
        if status < 0:
            raise RuntimeError("Npp library returned %d" % status)
        return dst

    raise RuntimeError("Not supported")

开发者ID:comp-imaging，项目名称:ProxImaL，代码行数:62，代码来源:cuda_npp.py

示例8: init

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import zeros [as 别名]
def __init__(self, num_inputs=None, num_outputs=None, weights=None, b=None, stream=None, \
    relu=False, sigmoid=False, delta=None):
        
        self.stream = stream
        
        if delta is None:
            self.delta = np.float32(0.001)
        else:
            self.delta = np.float32(delta)
        
        if weights is None:
            weights = (np.random.rand(num_outputs, num_inputs) -.5 ) 
            self.num_inputs = np.int32(num_inputs)
            self.num_outputs = np.int32(num_outputs)            
        
        if type(weights) != pycuda.gpuarray.GPUArray:
            self.weights = gpuarray.to_gpu_async(np.array(weights, dtype=np.float32) , stream = self.stream)
        else:
            self.weights = weights
        
        
        if num_inputs is None or num_outputs is None:
            
            self.num_inputs = np.int32(self.weights.shape[1])
            self.num_outputs = np.int32(self.weights.shape[0])
            
        else:
            self.num_inputs = np.int32(num_inputs)
            self.num_outputs = np.int32(num_outputs)


        if b is None:
            b = gpuarray.zeros((self.num_outputs,),dtype=np.float32)
            
        if type(b) != pycuda.gpuarray.GPUArray:
            self.b = gpuarray.to_gpu_async(np.array(b, dtype=np.float32) , stream = self.stream)
        else:
            self.b = b   
        
        self.relu = np.int32(relu)
        self.sigmoid = np.int32(sigmoid)
        
        self.block = (32,1,1)
        
        self.grid = (int(np.ceil(self.num_outputs / 32)), 1,1)

开发者ID:PacktPublishing，项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA，代码行数:47，代码来源:deep_neural_network.py

注：本文中的pycuda.gpuarray.zeros方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。