当前位置: 首页>>代码示例>>Python>>正文


Python driver.pagelocked_empty函数代码示例

本文整理汇总了Python中pycuda.driver.pagelocked_empty函数的典型用法代码示例。如果您正苦于以下问题:Python pagelocked_empty函数的具体用法?Python pagelocked_empty怎么用?Python pagelocked_empty使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了pagelocked_empty函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self,**params):

        '''
        Hack-ish way to avoid initialisation until the weights are transfered:
        '''
        should_apply = self.apply_output_fns_init
        params['apply_output_fns_init'] = False

        super(GPUSparseCFProjection,self).__init__(**params)
        # Transfering the weights:
        self.pycuda_stream = cuda.Stream()
        self.weights_gpu = cusparse.CSR.to_CSR(self.weights.toSparseArray().transpose())
        # Getting the row and columns indices for the *transposed* matrix. Used for Hebbian learning and normalisation:
        nzcols, nzrows = self.weights.nonzero()
        tups = sorted(zip(nzrows, nzcols))
        nzrows = [x[0] for x in tups]
        nzcols = [x[1] for x in tups]

        '''
        Allocating a page-locked piece of memory for the activity so that GPU could transfer data to the
        main memory without the involvment of the CPU:
        '''
        self.activity = cuda.pagelocked_empty(self.activity.shape, np.float32)
        self.activity_gpu_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)

        self.input_buffer_pagelocked = cuda.pagelocked_empty(shape=(self.weights_gpu.shape[1],), dtype=np.float32, mem_flags=cuda.host_alloc_flags.WRITECOMBINED)
        self.input_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[1], ), dtype=np.float32)

        self.norm_total_gpu = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)

        # Getting them on the GPU:
        self.nzcount = self.weights.getnnz()
        self.nzrows_gpu = gpuarray.to_gpu(np.array(nzrows, np.int32))
        self.nzcols_gpu = gpuarray.to_gpu(np.array(nzcols, np.int32))
        # Helper array for normalization:
        self.norm_ones_gpu = gpuarray.to_gpu(np.array([1.0] * self.weights_gpu.shape[1], np.float32))
        # Kernel that applies the normalisation:
        self.normalize_kernel = ElementwiseKernel(
                        "int *nzrows, float *norm_total, float *weights",
                        "weights[i] *= norm_total[nzrows[i]]",
                        "divisive_normalize")
        # Kernel that calculates the learning:
        self.hebbian_kernel = ElementwiseKernel(
                        "float single_conn_lr, int *row, int *col, float *src_activity, float *dest_activity, float *result",
                        "result[i] += single_conn_lr * src_activity[col[i]] * dest_activity[row[i]]",
                        "hebbian_learning")

        params['apply_output_fns_init'] = should_apply
        self.apply_output_fns_init = should_apply
        if self.apply_output_fns_init:
            self.apply_learn_output_fns()
开发者ID:Tasignotas,项目名称:topographica_mirror,代码行数:51,代码来源:projection.py

示例2: _gpuAlloc

    def _gpuAlloc(self):
        #Get GPU information
        self.freeMem = cuda.mem_get_info()[0] * .5 * .8 # limit memory use to 80% of available
        self.maxPossRows = np.int(np.floor(self.freeMem / (4 * self.totalCols)))    # multiply by 4 as that is size of float
        # set max rows to smaller number to save memory usage
        if self.totalRows < self.maxPossRows:
            print "reducing max rows to reduce memory use on GPU"
            self.maxPossRows = self.totalRows

        # create pagelocked buffers and GPU arrays
        self.to_gpu_buffer = cuda.pagelocked_empty((self.maxPossRows , self.totalCols), np.float32)
        self.from_gpu_buffer = cuda.pagelocked_empty((self.maxPossRows , self.totalCols), np.float32)
        self.data_gpu = cuda.mem_alloc(self.to_gpu_buffer.nbytes)
        self.result_gpu = cuda.mem_alloc(self.from_gpu_buffer.nbytes)
开发者ID:aFuerst,项目名称:PyCUDA-Raster,代码行数:14,代码来源:gpuCalc.py

示例3: __init__

    def __init__(self,**params):
        #Hack-ish way to avoid initialisation until the weights are transfered:
        should_apply = self.apply_output_fns_init
        params['apply_output_fns_init'] = False
        super(GPUSparseCFProjection,self).__init__(**params)
        # The sparse matrix is stored in COO format, used for Hebbian learning and normalisation:
        nzcols, nzrows, values = self.weights.getTriplets()
        tups = sorted(zip(nzrows, nzcols, values))
        nzrows = np.array([x[0] for x in tups], np.int32)
        nzcols = np.array([x[1] for x in tups], np.int32)
        values = np.array([x[2] for x in tups], np.float32)
        # Getting them on the GPU:
        self.nzcount = self.weights.getnnz()
        self.nzrows_gpu = gpuarray.to_gpu(nzrows)
        self.nzcols_gpu = gpuarray.to_gpu(nzcols)
        # Setting the projection weights in CSR format for dot product calculation:
        rowPtr = cusparse.coo2csr(self.nzrows_gpu, self.weights.shape[1])
        descrA = cusparse.cusparseCreateMatDescr()
        cusparse.cusparseSetMatType(descrA, cusparse.CUSPARSE_MATRIX_TYPE_GENERAL)
        cusparse.cusparseSetMatIndexBase(descrA, cusparse.CUSPARSE_INDEX_BASE_ZERO)

        self.weights_gpu = cusparse.CSR(descrA, values, rowPtr, self.nzcols_gpu, (self.weights.shape[1], self.weights.shape[0]))
        # Allocating a page-locked piece of memory for the activity so that GPU could transfer data to the
        # main memory without the involvment of the CPU:
        self.activity = cuda.pagelocked_empty(self.activity.shape, np.float32)
        self.activity_gpu_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)

        self.input_buffer_pagelocked = cuda.pagelocked_empty(shape=(self.weights_gpu.shape[1],), dtype=np.float32, mem_flags=cuda.host_alloc_flags.WRITECOMBINED)
        self.input_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[1], ), dtype=np.float32)

        self.norm_total_gpu = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)
        # Helper array for normalization:
        self.norm_ones_gpu = gpuarray.to_gpu(np.array([1.0] * self.weights_gpu.shape[1], np.float32))
        # Kernel that applies the normalisation:
        self.normalize_kernel = ElementwiseKernel(
                        "int *nzrows, float *norm_total, float *weights",
                        "weights[i] *= norm_total[nzrows[i]]",
                        "divisive_normalize")
        # Kernel that calculates the learning:
        self.hebbian_kernel = ElementwiseKernel(
                        "float single_conn_lr, int *row, int *col, float *src_activity, float *dest_activity, float *result",
                        "result[i] += single_conn_lr * src_activity[col[i]] * dest_activity[row[i]]",
                        "hebbian_learning")
        self.pycuda_stream = cuda.Stream()
        # Finishing the initialisation that might have been delayed:
        params['apply_output_fns_init'] = should_apply
        self.apply_output_fns_init = should_apply
        if self.apply_output_fns_init:
            self.apply_learn_output_fns()
开发者ID:wenqi2015,项目名称:topographica,代码行数:49,代码来源:projection.py

示例4: to_cpu

    def to_cpu(self):
        if self.flags.forc:
            return self.get(pagelocked=True)

        result = cuda.pagelocked_empty(self.shape, self.dtype)
        copy_non_contiguous(result, self)
        return result
开发者ID:Brieden,项目名称:pyDive,代码行数:7,代码来源:gpu_ndarray.py

示例5: get_next_minibatch

  def get_next_minibatch(self, i, train=TRAIN):
    if train == TRAIN:
      data = self.train_data
    else:
      data = self.test_data

    batch_data = data.data
    batch_label = data.labels
    batch_size = self.batch_size

    mini_data = batch_data[:, i * batch_size: (i + 1) * batch_size]
    locked_data = driver.pagelocked_empty(mini_data.shape, mini_data.dtype, order='C',
                                          mem_flags=driver.host_alloc_flags.PORTABLE)
    locked_data[:] = mini_data

    if self.input is not None and locked_data.shape == self.input.shape:
      self.input.set(locked_data)
    else:
      self.input = gpuarray.to_gpu(locked_data)
    
    label = batch_label[i * batch_size : (i + 1) * batch_size]
    #label = gpuarray.to_gpu(label)

    #label = gpuarray.to_gpu(np.require(batch_label[i * batch_size : (i + 1) * batch_size],  dtype =
    #  np.float, requirements = 'C'))

    return self.input, label
开发者ID:phecy,项目名称:striate,代码行数:27,代码来源:trainer.py

示例6: _padded_array

 def _padded_array(self,ar): #{{{
     nrows_pad = ar.shape[0]
     ncols_pad = 16*((ar.shape[1]+15)/16)
     #arpad = numpy.empty((nrows_pad,ncols_pad),dtype=ar.dtype)
     arpad = cuda.pagelocked_empty((nrows_pad,ncols_pad),dtype=ar.dtype)
     arpad[0:ar.shape[0],0:ar.shape[1]] = ar
     return arpad
开发者ID:ihaque,项目名称:SIML,代码行数:7,代码来源:GPULingo.py

示例7: __init__

    def __init__(self, backend, ioshape, initval, extent, aliases, tags):
        # Call the standard matrix constructor
        super().__init__(backend, ioshape, initval, extent, aliases, tags)

        # Allocate a page-locked buffer on the host for MPI to send/recv from
        self.hdata = cuda.pagelocked_empty((self.nrow, self.ncol),
                                           self.dtype, 'C')
开发者ID:GwenaelGabard,项目名称:PyFR,代码行数:7,代码来源:types.py

示例8: get_async

    def get_async(self, stream=None, ary=None):
        if ary is None:
            ary = drv.pagelocked_empty(self.shape, self.dtype)
        else:
            assert ary.size == self.size
            assert ary.dtype == self.dtype

        if self.size:
            drv.memcpy_dtoh_async(ary, self.gpudata, stream)
        return ary
开发者ID:minrk,项目名称:PyCUDA,代码行数:10,代码来源:gpuarray.py

示例9: __init__

    def __init__(self, backend, ioshape, initval, extent, aliases, tags):
        # Call the standard matrix constructor
        super().__init__(backend, ioshape, initval, extent, aliases, tags)

        # If MPI is CUDA-aware then construct a buffer out of our CUDA
        # device allocation and pass this directly to MPI
        if backend.mpitype == 'cuda-aware':
            self.hdata = _make_pybuf(self.data, self.nbytes, 0x200)
        # Otherwise, allocate a buffer on the host for MPI to send/recv from
        else:
            self.hdata = cuda.pagelocked_empty((self.nrow, self.ncol),
                                               self.dtype, 'C')
开发者ID:pv101,项目名称:PyFR,代码行数:12,代码来源:types.py

示例10: __gpu_decorate_nodes

  def __gpu_decorate_nodes(self, samples, labels):
    si_0 = driver.pagelocked_empty(self.n_samples, dtype = self.dtype_indices)
    si_1 = driver.pagelocked_empty(self.n_samples, dtype = self.dtype_indices)
    self.values_array = np.empty(self.n_nodes, dtype = self.dtype_labels)
    cuda.memcpy_dtoh(si_0, self.sorted_indices_gpu.ptr)
    cuda.memcpy_dtoh(si_1, self.sorted_indices_gpu_.ptr)
    
    decorate(self.target, 
              si_0, 
              si_1, 
              self.values_idx_array, 
              self.values_si_idx_array, 
              self.values_array, 
              self.n_nodes)

    self.values_idx_array = None
    self.values_si_idx_array = None
    self.left_children.resize(self.n_nodes, refcheck = False)
    self.right_children.resize(self.n_nodes, refcheck = False) 
    self.feature_threshold_array.resize(self.n_nodes, refcheck = False) 
    self.feature_idx_array.resize(self.n_nodes, refcheck = False)
开发者ID:phecy,项目名称:CudaTree,代码行数:21,代码来源:random_tree.py

示例11: get

 def get(self, ary=None, pagelocked=False):
     if ary is None:
         if pagelocked:
             ary = drv.pagelocked_empty(self.shape, self.dtype)
         else:
             ary = numpy.empty(self.shape, self.dtype)
     else:
         assert ary.size == self.size
         assert ary.dtype == self.dtype
     if self.size:
         drv.memcpy_dtoh(ary, self.gpudata)
     return ary
开发者ID:minrk,项目名称:PyCUDA,代码行数:12,代码来源:gpuarray.py

示例12: threshold_integrated

def threshold_integrated(series, value):
    global _dn, _n, _bn, _loc_tmp, _loc_out, _val_out, _loc, _val
        
    t = numpy.float32(value**2)
    nb = int(numpy.ceil(float(len(series))/nt/gs))
    
    if _bn is None or len(_bn) < nb:
        _bn = gpuarray.zeros(nb, dtype=numpy.uint32)
        
    if _n is None:
        _n = driver.pagelocked_empty((1), numpy.uint32, mem_flags=drv.host_alloc_flags.DEVICEMAP)
        ptr = numpy.intp(_n.base.get_device_pointer())
        class T():
            pass
        _dn = T()
        _dn.gpudata = ptr
        _dn.flags = _n.flags
        
    if _loc_tmp is None or len(series) > len(_loc_tmp):
        _loc_tmp = gpuarray.zeros(len(series), dtype=numpy.uint32)
        _loc_out = gpuarray.zeros(len(series), dtype=numpy.uint32)
        _val_out = gpuarray.zeros(len(series), dtype=series.dtype)
        _val = driver.pagelocked_empty((4096*256), numpy.complex64)
        _loc = driver.pagelocked_empty((4096*256), numpy.uint32)
    
    #Do the thresholding by block
    stuff(series.data, _loc_tmp, _bn, t, numpy.uint32(len(series)), block=(nt, 1, 1), grid=(nb, 1))
    
    # Recombine the blocks into a final output
    stuff2(series.data, _loc_tmp, _loc_out, _val_out, _bn, _dn, block=(nb, 1, 1), grid=(nb, 1))
    
    # We need to get the data back now
    pycbc.scheme.mgr.state.context.synchronize()
    if _n != 0: 
        driver.memcpy_dtoh_async(_val[0:_n], _val_out.gpudata)
        driver.memcpy_dtoh_async(_loc[0:_n], _loc_out.gpudata)
        pycbc.scheme.mgr.state.context.synchronize()
    return _loc[0:_n], _val[0:_n]
开发者ID:AbhayMK,项目名称:pycbc,代码行数:38,代码来源:threshold_cuda.py

示例13: get_async

    def get_async(self, stream=None, ary=None):
        if ary is None:
            ary = drv.pagelocked_empty(self.shape, self.dtype)

            ary = _as_strided(ary, strides=self.strides)
        else:
            assert ary.size == self.size
            assert ary.dtype == self.dtype
            assert ary.flags.forc

        assert self.flags.forc, "Array in get() must be contiguous"

        if self.size:
            drv.memcpy_dtoh_async(ary, self.gpudata, stream)
        return ary
开发者ID:hannes-brt,项目名称:pycuda,代码行数:15,代码来源:gpuarray.py

示例14: get_async

 def get_async(self, stream = None, ary = None):
     if ary is None:
         ary = cuda.pagelocked_empty(self.shape, self.dtype)
         
     else:
         assert ary.size == self.size
         assert ary.dtype == ary.dtype
         if ary.base.__class__ != cuda.HostAllocation:
             raise TypeError("asynchronous memory trasfer requires pagelocked numpy array")
     
     if self.size:
         if self.M == 1:
             cuda.memcpy_dtoh_async(ary, self.gpudata, stream)
         else:
             PitchTrans(self.shape, ary, _pd(self.shape), self.gpudata, self.ld, self.dtype, async = True, stream = stream)
             
     return ary
开发者ID:bionet,项目名称:vtem,代码行数:17,代码来源:parray.py

示例15: get

    def get(self, ary=None, astype=None, pagelocked=False):
        if ary is None:
            if pagelocked:
                ary = drv.pagelocked_empty(self.shape, self.dtype)
            else:
                ary = np.empty(self.shape, self.dtype)

            ary = _as_strided(ary, strides=self.strides)
        else:
            assert ary.size == self.size
            assert ary.dtype == self.dtype
            assert ary.flags.forc

        assert self.flags.forc, "Array in get() must be contiguous"

        if self.size:
            drv.memcpy_dtoh(ary, self.gpudata)

        if astype is not None:
            ary = ary.astype(astype) * 2 ** (self.iwl - 15)

        return ary
开发者ID:benanne,项目名称:nervana-lib-gpu-performance-preview,代码行数:22,代码来源:flexpt_array.py


注:本文中的pycuda.driver.pagelocked_empty函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。