Python gpuarray.empty函数代码示例

本文整理汇总了Python中pycuda.gpuarray.empty函数的典型用法代码示例。如果您正苦于以下问题：Python empty函数的具体用法？Python empty怎么用？Python empty使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了empty函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _minmax_impl

def _minmax_impl(a_gpu, axis, min_or_max, stream=None):
    ''' Returns both max and argmax (min/argmin) along an axis.'''
    assert len(a_gpu.shape) < 3
    if iscomplextype(a_gpu.dtype):
        raise ValueError("Cannot compute min/max of complex values")

    if axis is None:  ## Note: PyCUDA doesn't have an overall argmax/argmin!
        if min_or_max == 'max':
            return gpuarray.max(a_gpu).get()
        else:
            return gpuarray.min(a_gpu).get()
    else:
        if axis < 0:
            axis += 2
    assert axis in (0, 1)

    global _global_cublas_allocator
    alloc = _global_cublas_allocator

    n, m = a_gpu.shape if a_gpu.flags.c_contiguous else (a_gpu.shape[1], a_gpu.shape[0])
    col_kernel, row_kernel = _get_minmax_kernel(a_gpu.dtype, min_or_max)
    if (axis == 0 and a_gpu.flags.c_contiguous) or (axis == 1 and a_gpu.flags.f_contiguous):
        target = gpuarray.empty(m, dtype=a_gpu.dtype, allocator=alloc)
        idx = gpuarray.empty(m, dtype=np.uint32, allocator=alloc)
        col_kernel(a_gpu, target, idx, np.uint32(m), np.uint32(n),
                   block=(32, 1, 1), grid=(m, 1, 1), stream=stream)
    else:
        target = gpuarray.empty(n, dtype=a_gpu, allocator=alloc)
        idx = gpuarray.empty(n, dtype=np.uint32, allocator=alloc)
        row_kernel(a_gpu, target, idx, np.uint32(m), np.uint32(n),
                block=(32, 1, 1), grid=(n, 1, 1), stream=stream)
    return target, idx

开发者ID:oursland，项目名称:scikits.cuda，代码行数:32，代码来源:misc.py

示例2: generate_shifts_2d

def generate_shifts_2d(width, height, n_samples, with_hot=False):
    x_shifts = gpu_rng.gen_uniform((n_samples,), np.float32) * (width - 0.01)
    x_shifts = x_shifts.astype(np.uint32)

    y_shifts = gpu_rng.gen_uniform((n_samples,), np.float32) * (height - 0.01)
    y_shifts = y_shifts.astype(np.uint32)

    if with_hot:
        shifts_hot = gp.empty((width * height, n_samples), np.float32)
        threads_per_block = 32
        n_blocks = int(math.ceil(n_samples / threads_per_block))
        gpu_shift_to_hot_2d(x_shifts, y_shifts, shifts_hot,
                            np.uint32(shifts_hot.strides[0]/4),
                            np.uint32(shifts_hot.strides[1]/4),
                            np.uint32(width), np.uint32(height), np.uint32(n_samples),
                            block=(threads_per_block, 1, 1), grid=(n_blocks, 1))
        return x_shifts, y_shifts, shifts_hot
    else:
        shifts = gp.empty((2, n_samples), np.float32)
        threads_per_block = 32
        n_blocks = int(math.ceil(n_samples / threads_per_block))
        gpu_vstack(y_shifts, x_shifts, shifts,
                   np.uint32(shifts.strides[0]/4), np.uint32(shifts.strides[1]/4),
                   np.uint32(n_samples),
                   block=(threads_per_block, 1, 1), grid=(n_blocks, 1))
        return x_shifts, y_shifts, shifts

开发者ID:surban，项目名称:ml，代码行数:26，代码来源:shift_gpu.py

示例3: sample_dropout_mask

def sample_dropout_mask(x, dropout_probability=.5, columns=None, stream=None, target=None,
                        dropout_mask=None, dropout_prob_array=None):
    """ Samples a dropout mask and applies it in place"""

    assert x.flags.c_contiguous

    if columns is not None:
        assert len(columns) == 2
        x_tmp = x
        x = extract_columns(x, columns[0], columns[1])

    shape = x.shape

    if dropout_prob_array is None:
        dropout_prob_array = gpuarray.empty(shape, x.dtype)
    sampler.fill_uniform(dropout_prob_array, stream)

    if dropout_mask is None:
        dropout_mask = gpuarray.empty(shape, np.int8)

    if target is None: target = x
    
    all_kernels['sample_dropout_mask'](
        x, target, dropout_mask, dropout_prob_array,
        np.float32(dropout_probability))

    if columns is not None:
        insert_columns(x, x_tmp, columns[0])

    return dropout_mask

开发者ID:Snazz2001，项目名称:hebel，代码行数:30，代码来源:elementwise.py

示例4: get

    def get(self, V_gpu, xcl_gpu, xcr_gpu, W_gpu, x_gpu, stream=None):
        """
        """
        if stream is None:
            stream = cuda.Stream()

        # Temporary variables
        z_gpu = gpuarray.empty((self.params['V_d'], 
                                self.params['V_w']), self.params['dtype'])
        xc2_gpu = gpuarray.empty(2*self.params['w_d'], self.params['dtype'])
            
        blockDim_x = self.params['V_w']
        
        self._func[0](xcl_gpu, xcr_gpu, xc2_gpu,
                      block=(blockDim_x, 1, 1),
                      stream=stream)            
        
        gridDim_z = self.params['V_d']
        blockDim_y = self.params['V_w']
        
        self._func[1](V_gpu, xc2_gpu, z_gpu,
                      block=(1, blockDim_y, 1),
                      grid=(1, 1, gridDim_z),
                      stream=stream)

        blockDim_y = self.params['W_h']
        
        self._func[2](W_gpu, xc2_gpu, z_gpu, x_gpu,
                      block=(1, blockDim_y, 1),
                      grid=(1, 1, 1),
                      stream=stream)

开发者ID:jolinxql，项目名称:RNTN，代码行数:31，代码来源:RNTN_cuda.py

示例5: enable3d

    def enable3d(self):
        self.point1 = self.point-(self.mesh_diagonal_norm/60)*self.axis2
        self.point2 = self.point+(self.mesh_diagonal_norm/60)*self.axis2

        self.viewing_angle = 0.0

        pos1, dir1 = from_film(self.point1, axis1=self.axis1, axis2=self.axis2,
                               size=self.size, width=self.film_width)
        pos2, dir2 = from_film(self.point2, axis1=self.axis1, axis2=self.axis2,
                               size=self.size, width=self.film_width)

        self.rays1 = gpu.GPURays(pos1, dir1,
                                 max_alpha_depth=self.max_alpha_depth)
        self.rays2 = gpu.GPURays(pos2, dir2,
                                 max_alpha_depth=self.max_alpha_depth)

        scope_size = (self.size[0]//4, self.size[0]//4)

        scope_pos, scope_dir = from_film(self.point, axis1=self.axis1,
                                         axis2=self.axis2, size=scope_size,
                                         width=self.film_width/4.0)

        self.scope_rays = gpu.GPURays(scope_pos, scope_dir)

        self.scope_pixels_gpu = ga.empty(self.scope_rays.pos.size, dtype=np.uint32)

        self.pixels1_gpu = ga.empty(self.width*self.height, dtype=np.uint32)
        self.pixels2_gpu = ga.empty(self.width*self.height, dtype=np.uint32)
        
        self.distances_gpu = ga.empty(self.scope_rays.pos.size,
                                      dtype=np.float32)
        self.display3d = True

开发者ID:BenLand100，项目名称:chroma，代码行数:32，代码来源:camera.py

示例6: init

    def __init__(self, res=(640, 480)):
        mod = cuda.SourceModule(file("cpp/trace.cu").read(), keep=True, options=['-I../cpp'], no_extern_c=True)
        self.InitEyeRays = mod.get_function("InitEyeRays")
        self.InitFishEyeRays = mod.get_function("InitFishEyeRays")
        self.Trace = mod.get_function("Trace")
        self.ShadeSimple = mod.get_function("ShadeSimple")
        self.mod = mod

        self.block = (16, 32, 1)  # 15: 32, 18: 28, 19: 24
        self.grid = ( res[0]/self.block[0], res[1]/self.block[1] )
        self.resx, self.resy = (self.grid[0]*self.block[0], self.grid[1]*self.block[1])

        self.smallblock = (16, 16, 1)
        self.smallgrid = ( res[0]/self.smallblock[0], res[1]/self.smallblock[1] )

        self.d_img = ga.empty( (self.resy, self.resx, 4), uint8 )

        '''
        struct RayData
        {
          float3 dir;
          float t;
          VoxNodeId endNode;
          int endNodeChild;
          float endNodeSize;
        };
        '''
        raySize = struct.calcsize("3f f i i f")
        self.d_rays = ga.empty( (self.resy, self.resx, raySize), uint8 )

        self.setLightPos((0.5, 0.5, 1))
        self.detailCoef = 10.0

开发者ID:znah，项目名称:yoxel-voxel，代码行数:32，代码来源:trace_cuda.py

示例7: test_cublas_bug

def test_cublas_bug():
    '''
    The SGEMM call would cause all calls after it to fail for some unknown
    reason. Likely this is caused swaprows causing memory corruption.

    NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
          fixed in CUDA 6.5
    '''
    from pycuda.driver import Stream
    from skcuda.cublas import cublasSgemm
    from skcuda.misc import _global_cublas_handle as handle

    n = 131

    s = slice(128, n)
    X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
    a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
    c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
    b = gpuarray.empty_like(X)

    m, n = a.shape[0], b[s].shape[1]
    k = a.shape[1]
    lda = m
    ldb = k
    ldc = m
    #cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    #print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc

    #gpuarray.dot(d, Xoutd[s])
    #op.sgemm(a, b[s], c)

    stream = Stream()
    stream.synchronize()

开发者ID:stachon，项目名称:binet，代码行数:34，代码来源:test_op.py

示例8: test_cublasSgetriBatched

    def test_cublasSgetriBatched(self):
        l,m = 11,7
        np.random.seed(1)
        A = np.random.rand(l,m, m).astype(np.float32)
        
        a_gpu = gpuarray.to_gpu(A)
        a_arr = bptrs(a_gpu)
        c_gpu = gpuarray.empty((l,m,m), np.float32)
        c_arr = bptrs(c_gpu)

        p_gpu = gpuarray.empty((l,m), np.int32)
        i_gpu = gpuarray.zeros(l, np.int32)

        cublas.cublasSgetrfBatched(self.cublas_handle, 
                    m, a_arr.gpudata, m, p_gpu.gpudata, 
                    i_gpu.gpudata, l)

        cublas.cublasSgetriBatched(self.cublas_handle, 
                    m, a_arr.gpudata, m, p_gpu.gpudata, c_arr.gpudata,m,
                    i_gpu.gpudata, l)
        
        X = np.array(map(np.linalg.inv,A))
        X_ = c_gpu.get()

        assert np.allclose(X,X_,6)

开发者ID:teodor-moldovan，项目名称:scikits.cuda，代码行数:25，代码来源:test_cublas.py

示例9: _create_halo_arrays

    def _create_halo_arrays(self):

        # Allocate space for the halos: two per face,
        # one for sending and one for receiving.

        nz, ny, nx = self.local_dims
        sw = self.stencil_width
        # create two halo regions for each face, one holding
        # the halo values to send, and the other holding
        # the halo values to receive.

        self.left_recv_halo = gpuarray.empty([nz,ny,sw], dtype=np.float64)
        self.left_send_halo = self.left_recv_halo.copy()
        self.right_recv_halo = self.left_recv_halo.copy()
        self.right_send_halo = self.left_recv_halo.copy()
    
        self.bottom_recv_halo = gpuarray.empty([nz,sw,nx], dtype=np.float64)
        self.bottom_send_halo = self.bottom_recv_halo.copy()
        self.top_recv_halo = self.bottom_recv_halo.copy()
        self.top_send_halo = self.bottom_recv_halo.copy()

        self.back_recv_halo = gpuarray.empty([sw,ny,nx], dtype=np.float64)
        self.back_send_halo = self.back_recv_halo.copy()
        self.front_recv_halo = self.back_recv_halo.copy()
        self.front_send_halo = self.back_recv_halo.copy()

开发者ID:shwina，项目名称:gpuDA，代码行数:25，代码来源:gpuda.py

示例10: gradient_gpu

def gradient_gpu(y_gpu, mode='valid'):

  shape = np.array(y_gpu.shape).astype(np.uint32)
  dtype = y_gpu.dtype
  block_size = (16,16,1)
  grid_size = (int(np.ceil(float(shape[1])/block_size[0])),
               int(np.ceil(float(shape[0])/block_size[1])))
  shared_size = int((1+block_size[0])*(1+block_size[1])*dtype.itemsize)

  preproc = _generate_preproc(dtype, shape)
  mod = SourceModule(preproc + kernel_code, keep=True)

  if mode == 'valid':
    gradient_gpu = mod.get_function("gradient_valid")

    gradx_gpu = cua.empty((y_gpu.shape[0], y_gpu.shape[1]-1), y_gpu.dtype)
    grady_gpu = cua.empty((y_gpu.shape[0]-1, y_gpu.shape[1]), y_gpu.dtype)

  if mode == 'same':
    gradient_gpu = mod.get_function("gradient_same")

    gradx_gpu = cua.empty((y_gpu.shape[0], y_gpu.shape[1]), y_gpu.dtype)
    grady_gpu = cua.empty((y_gpu.shape[0], y_gpu.shape[1]), y_gpu.dtype)
    
  gradient_gpu(gradx_gpu.gpudata, grady_gpu.gpudata, y_gpu.gpudata,
               block=block_size, grid=grid_size, shared=shared_size)

  return (gradx_gpu, grady_gpu)

开发者ID:matthiaslee，项目名称:VMBD，代码行数:28，代码来源:gputools.py

示例11: initializeGpuMemory

 def initializeGpuMemory(self):
     K = self.modelParams["proc_id_model","K"]
     
     # Sufficient statistics for the parameters of G kernels
     self.gpuPtrs["impulse_model","nnz_Z"] = gpuarray.empty((K,K), dtype=np.int32)
     self.gpuPtrs["impulse_model","g_suff_stats"] = gpuarray.empty((K,K), dtype=np.float32) 
     self.gpuPtrs["impulse_model","GS"] = gpuarray.empty_like(self.base.dSS["dS"])

开发者ID:richardkwo，项目名称:pyhawkes，代码行数:7，代码来源:impulse_models.py

示例12: _init_comm_bufs

    def _init_comm_bufs(self):
        """
        Buffers for sending/receiving data from other modules.

        Notes
        -----
        Must be executed after `_init_port_dicts()`.
        """

        # Buffers (and their interfaces and MPI types) for receiving data
        # transmitted from source modules:
        self._in_buf = {}
        self._in_buf['gpot'] = {}
        self._in_buf['spike'] = {}
        self._in_buf_int = {}
        self._in_buf_int['gpot'] = {}
        self._in_buf_int['spike'] = {}        
        self._in_buf_mtype = {}
        self._in_buf_mtype['gpot'] = {}
        self._in_buf_mtype['spike'] = {}        
        for in_id in self._in_ids:
            self._in_buf['gpot'][in_id] = \
                gpuarray.empty(len(self._in_port_dict_ids['gpot'][in_id]),
                               self.pm['gpot'].dtype)
            self._in_buf_int['gpot'][in_id] = bufint(self._in_buf['gpot'][in_id])
            self._in_buf_mtype['gpot'][in_id] = \
                dtype_to_mpi(self._in_buf['gpot'][in_id].dtype)
            self._in_buf['spike'][in_id] = \
                gpuarray.empty(len(self._in_port_dict_ids['spike'][in_id]),
                               self.pm['spike'].dtype)
            self._in_buf_int['spike'][in_id] = bufint(self._in_buf['spike'][in_id])
            self._in_buf_mtype['spike'][in_id] = \
                dtype_to_mpi(self._in_buf['spike'][in_id].dtype)

        # Buffers (and their interfaces and MPI types) for transmitting data to
        # destination modules:
        self._out_buf = {}
        self._out_buf['gpot'] = {}
        self._out_buf['spike'] = {}
        self._out_buf_int = {}
        self._out_buf_int['gpot'] = {}
        self._out_buf_int['spike'] = {}
        self._out_buf_mtype = {}
        self._out_buf_mtype['gpot'] = {}
        self._out_buf_mtype['spike'] = {}
        for out_id in self._out_ids:
            self._out_buf['gpot'][out_id] = \
                gpuarray.empty(len(self._out_port_dict_ids['gpot'][out_id]),
                               self.pm['gpot'].dtype)
            self._out_buf_int['gpot'][out_id] = bufint(self._out_buf['gpot'][out_id])
            self._out_buf_mtype['gpot'][out_id] = \
                dtype_to_mpi(self._out_buf['gpot'][out_id].dtype)

            self._out_buf['spike'][out_id] = \
                gpuarray.empty(len(self._out_port_dict_ids['spike'][out_id]),
                               self.pm['spike'].dtype)
            self._out_buf_int['spike'][out_id] = bufint(self._out_buf['spike'][out_id])
            self._out_buf_mtype['spike'][out_id] = \
                dtype_to_mpi(self._out_buf['spike'][out_id].dtype)

开发者ID:ScartleRoy，项目名称:neurokernel，代码行数:59，代码来源:core_gpu.py

示例13: getFields

 def getFields(self,x,y):
   outX = gpuarray.empty((self.Nfields,x,y),np.float32)
   outY = gpuarray.empty((self.Nfields,x,y),np.float32)
   grid = (int(ceil(x/32)),int(ceil(y/32)))
   block = (int(ceil(x/grid[0])),int(ceil(y/grid[1])),1)
   for i in range(self.Nfields):
     self.resampleF[i].prepared_call(grid,block,outX[i,:,:].gpudata,outY[i,:,:].gpudata,np.int32(x),np.int32(y))
   return outX,outY

开发者ID:LaboratoireMecaniqueLille，项目名称:GPGPU，代码行数:8，代码来源:iCorrel.py

示例14: show_values

def show_values(matrix_size, threads_per_block):
    a_cpu = np.random.randn(matrix_size, matrix_size).astype(np.float32)

    # transfer host (CPU) memory to device (GPU) memory
    a_gpu = gpuarray.to_gpu(a_cpu)
    id_groups_x = gpuarray.empty((matrix_size, matrix_size), np.float32)
    id_groups_y = gpuarray.empty((matrix_size, matrix_size), np.float32)
    id_threads_x = gpuarray.empty((matrix_size, matrix_size), np.float32)
    id_threads_y = gpuarray.empty((matrix_size, matrix_size), np.float32)
    id_cell = gpuarray.empty((matrix_size, matrix_size), np.float32)

    blocks = (threads_per_block, 1, 1)

    blocks_per_side = int(matrix_size / threads_per_block)

    if (blocks_per_side * threads_per_block) < matrix_size:
        blocks_per_side = blocks_per_side + 1

    grid = (blocks_per_side, matrix_size, 1)

    print("Blocks: ", blocks)
    print("Grid: ", grid)

    kernel_code = kernel_source_code % {'MATRIX_SIZE': matrix_size, 'BLOCK_SIZE': threads_per_block}

    compiled_kernel = compiler.SourceModule(kernel_code)

    kernel_binary = compiled_kernel.get_function("markThreadID")

    kernel_binary(
        # inputs
        a_gpu,
        # outputs
        id_groups_x, id_groups_y, id_threads_x, id_threads_y, id_cell,
        block=blocks,
        grid=grid
    )

    id_blocks_x_cpu = id_groups_x.get()
    id_blocks_y_cpu = id_groups_y.get()
    id_threads_x_cpu = id_threads_x.get()
    id_threads_y_cpu = id_threads_y.get()
    id_cell_cpu = id_cell.get()

    print("id_blocks_x_cpu")
    print(id_blocks_x_cpu)

    print("id_blocks_y_cpu")
    print(id_blocks_y_cpu)

    print("id_threads_x_cpu")
    print(id_threads_x_cpu)

    print("id_threads_y_cpu")
    print(id_threads_y_cpu)

    print("id_cell_cpu")
    print(id_cell_cpu)

开发者ID:javierip，项目名称:parallel-code-examples，代码行数:58，代码来源:main.py

示例15: initializeGpuMemory

 def initializeGpuMemory(self):
     """
     Allocate GPU memory for the base model parameters
     """
     N = self.base.data.N
     K = self.base.data.K
             
     self.gpuPtrs["proc_id_model","C"] = gpuarray.empty((N,), dtype=np.int32)
     self.gpuPtrs["proc_id_model","Ns"] = gpuarray.empty((K,), dtype=np.int32)

开发者ID:richardkwo，项目名称:pyhawkes，代码行数:9，代码来源:process_id_models.py

注：本文中的pycuda.gpuarray.empty函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。