当前位置: 首页>>代码示例>>Python>>正文


Python driver.memcpy_dtod函数代码示例

本文整理汇总了Python中pycuda.driver.memcpy_dtod函数的典型用法代码示例。如果您正苦于以下问题:Python memcpy_dtod函数的具体用法?Python memcpy_dtod怎么用?Python memcpy_dtod使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了memcpy_dtod函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _read_external_input

 def _read_external_input(self):
     if not self.input_eof or self.frame_count<self.frames_in_buffer:
         cuda.memcpy_dtod(int(int(self.synapse_state.gpudata) + \
                          self.total_synapses*self.synapse_state.dtype.itemsize), \
                          int(int(self.I_ext.gpudata) + self.frame_count*self.I_ext.ld*self.I_ext.dtype.itemsize), \
                          self.num_input * self.synapse_state.dtype.itemsize)
         self.frame_count += 1
     else:
         self.logger.info('Input end of file reached. Subsequent behaviour is undefined.')
     if self.frame_count >= self._one_time_import and not self.input_eof:
         input_ld = self.input_h5file.root.array.shape[0]
         if input_ld - self.file_pointer < self._one_time_import:
             h_ext = self.input_h5file.root.array.read(self.file_pointer, input_ld)
         else:
             h_ext = self.input_h5file.root.array.read(self.file_pointer, self.file_pointer + self._one_time_import)
         if h_ext.shape[0] == self.I_ext.shape[0]:
             self.I_ext.set(h_ext)
             self.file_pointer += self._one_time_import
             self.frame_count = 0
         else:
             pad_shape = list(h_ext.shape)
             self.frames_in_buffer = h_ext.shape[0]
             pad_shape[0] = self._one_time_import - h_ext.shape[0]
             h_ext = np.concatenate((h_ext, np.zeros(pad_shape)), axis=0)
             self.I_ext.set(h_ext)
             self.file_pointer = input_ld
             
         if self.file_pointer == self.input_h5file.root.array.shape[0]:
             self.input_eof = True
开发者ID:LuisMoralesAlonso,项目名称:neurokernel,代码行数:29,代码来源:LPU.py

示例2: _gpuarray_copy

def _gpuarray_copy(array):
    if not array.flags.forc:
        raise RuntimeError('only contiguous arrays may copied.')

    new = GPUArray(array.shape, array.dtype, allocator=array.allocator)
    drv.memcpy_dtod(new.gpudata, array.gpudata, array.nbytes)
    return new
开发者ID:ALEXGUOQ,项目名称:chainer,代码行数:7,代码来源:cuda.py

示例3: copy

    def copy(self):
        if not self.flags.forc:
            raise RuntimeError("only contiguous arrays may copied.")

        new = GPUArray(self.shape, self.dtype)
        drv.memcpy_dtod(new.gpudata,self.gpudata,self.nbytes)
        return new
开发者ID:hannes-brt,项目名称:pycuda,代码行数:7,代码来源:gpuarray.py

示例4: _read_external_input

    def _read_external_input(self):
        # if eof not reached or there are frames in buffer not read
        # copy the input from buffer to synapse state array
        if not self.input_eof or self.frame_count < self.frames_in_buffer:
            cuda.memcpy_dtod(
                int(int(self.synapse_state.gpudata) + self.total_synapses * self.synapse_state.dtype.itemsize),
                int(int(self.I_ext.gpudata) + self.frame_count * self.I_ext.ld * self.I_ext.dtype.itemsize),
                self.num_input * self.synapse_state.dtype.itemsize,
            )
            self.frame_count += 1
        else:
            self.log_info("Input end of file reached. " "Subsequent behaviour is undefined.")
        # if all buffer frames were read, read from file
        if self.frame_count >= self._one_time_import and not self.input_eof:
            input_ld = self.input_h5file.root.array.shape[0]
            if input_ld - self.file_pointer < self._one_time_import:
                h_ext = self.input_h5file.root.array.read(self.file_pointer, input_ld)
            else:
                h_ext = self.input_h5file.root.array.read(self.file_pointer, self.file_pointer + self._one_time_import)
            if h_ext.shape[0] == self.I_ext.shape[0]:
                self.I_ext.set(h_ext)
                self.file_pointer += self._one_time_import
                self.frame_count = 0
            else:
                pad_shape = list(h_ext.shape)
                self.frames_in_buffer = h_ext.shape[0]
                pad_shape[0] = self._one_time_import - h_ext.shape[0]
                h_ext = np.concatenate((h_ext, np.zeros(pad_shape)), axis=0)
                self.I_ext.set(h_ext)
                self.file_pointer = input_ld

            if self.file_pointer == self.input_h5file.root.array.shape[0]:
                self.input_eof = True
开发者ID:yiyin,项目名称:neurokernel,代码行数:33,代码来源:LPU.py

示例5: swapHashTableValues

 def swapHashTableValues(new_vals):
     table_vals, table_vals_size = mod.get_global('table_values') # (device_ptr, size_in_bytes)
     old_vals_gpu = cuda.mem_alloc(table_vals_size)
     # old_vals_gpu = gpuarray.empty((table_vals_size,1), )
     cuda.memcpy_dtod(old_vals_gpu, table_vals, table_vals_size)
     cuda.memcpy_dtod(table_vals, new_vals.gpudata, table_vals_size)
     return old_vals_gpu
开发者ID:AdrianLsk,项目名称:permutohedral_pycuda,代码行数:7,代码来源:filter_pycuda.py

示例6: cache_z

 def cache_z(self, z):
     x = np.require(z.real, dtype = np.double, requirements = ['A','W','O','C'])
     y = np.require(z.imag, dtype = np.double, requirements = ['A','W','O','C'])
     xd = gpuarray.to_gpu(x)
     yd = gpuarray.to_gpu(y)
     cuda.memcpy_dtod(self.xd, xd.ptr, xd.nbytes)
     cuda.memcpy_dtod(self.yd, yd.ptr, yd.nbytes)
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:7,代码来源:riemanntheta_omegas.py

示例7: matvec

 def matvec(self, v):
     x = v.reshape((self.D, self.D))
     
     self.xG.set(x)
     #self.out2.set(self.xG)
     #self.out2[:] = self.xG
     cd.memcpy_dtod(self.out2.gpudata, self.xG.gpudata, self.xG.nbytes)
     
     out = [self.out, self.out_p]
     out2 = [self.out2, self.out2_p]
     
     if self.left: #Multiplying from the left, but x is a col. vector, so use mat_dagger
         for k in range(len(self.A1G)):
             if self.use_batch:
                 eps_l_noop_batch(out2[1], self.A1G_p[k], self.A2G_p[k], out[0], 
                                  self.tmp_p, self.tmp2_p, self.tmp2, self.hdl)
             else:
                 eps_l_noop_strm_dev(out2[0], self.A1G[k], self.A2G[k], out[0],
                                     self.tmp, self.tmp2, self.ones, self.zeros,
                                     self.streams, self.hdl)
             out, out2 = out2, out
         Ehx = out2[0]
         
         if self.pseudo:
             QEQhx = Ehx - self.lG * m.adot(self.r, x)
             #res = QEQhx.mul_add(-sp.exp(-1.j * self.p), self.xG, 1)
             cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(-1.j * self.p), 
                            QEQhx.gpudata, 1, self.xG.gpudata, 1)
             res = self.xG
         else:
             #res = Ehx.mul_add(-sp.exp(-1.j * self.p), self.xG, 1)
             cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(-1.j * self.p), 
                            Ehx.gpudata, 1, self.xG.gpudata, 1)
             res = self.xG
     else:
         for k in range(len(self.A2G) - 1, -1, -1):
             if self.use_batch:
                 eps_r_noop_batch(out2[1], self.A1G_p[k], self.A2G_p[k], out[0], 
                                  self.tmp_p, self.tmp2_p, self.tmp2, self.hdl)
             else:
                 eps_r_noop_strm_dev(out2[0], self.A1G[k], self.A2G[k], out[0],
                                     self.tmp, self.tmp2, self.ones, self.zeros,
                                     self.streams, self.hdl)
             out, out2 = out2, out
         Ex = out2[0]
         
         if self.pseudo:
             QEQx = Ex - self.rG * m.adot(self.l, x)
             #res = QEQx.mul_add(-sp.exp(1.j * self.p), self.xG, 1)
             cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(1.j * self.p), 
                            QEQx.gpudata, 1, self.xG.gpudata, 1)
             res = self.xG
         else:
             #res = Ex.mul_add(-sp.exp(1.j * self.p), self.xG, 1)
             cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(1.j * self.p), 
                            Ex.gpudata, 1, self.xG.gpudata, 1)
             res = self.xG
     
     return res.get().ravel()
开发者ID:amilsted,项目名称:evoMPS,代码行数:59,代码来源:cuda_alternatives.py

示例8: set_data

def set_data(filenames, file_count,subb, config, count, cur, img_mean, gpu_data, gpu_data_remote, ctx, icomm,img_batch_empty):

    load_time = time.time()
    data=None
    
#    aa = config['rank']+count/subb*size
#    img_list = range(aa*config['file_batch_size'],(aa+1)*config['file_batch_size'],1) 
    #print rank, img_list
    if config['data_source'] in ['hkl','both']:
        data_hkl = hkl.load(str(filenames[file_count]))# c01b
        data = data_hkl
        
    if config['data_source'] in ['lmdb', 'both']:       
        data_lmdb = lmdb_load_cur(cur,config,img_batch_empty) 
        data = data_lmdb
                        
    if config['data_source']=='both': 
        if config['rank']==0: print (rank,(data_hkl-data_lmdb)[1,0:3,1,1].tolist())
        
    load_time = time.time()-load_time #)*

    sub_time = time.time() #(
    data = data -img_mean
    sub_time = time.time()-sub_time

    crop_time = time.time() #(

    for minibatch_index in range(subb):
        count+=1
        
        batch_data = data[:,:,:,minibatch_index*config['batch_size']:(minibatch_index+1)*batch_size]
        if mode == 'train':
            rand_arr = get_rand3d(config['random'], count+(rank+1)*n_files*(subb))
        else:
            rand_arr = np.float32([0.5, 0.5, 0]) 
        batch_data = crop_and_mirror(batch_data, rand_arr, flag_batch=config['batch_crop_mirror'],cropsize=config['input_width'])
        gpu_data[minibatch_index].set(batch_data)   

    crop_time = time.time() - crop_time #)
	
    #print 'load_time:  %f (load %f, sub %f, crop %f)' % (load_time+crop_time+sub_time, load_time,sub_time, crop_time)
    
    # wait for computation on last file to finish
    msg = icomm.recv(source=MPI.ANY_SOURCE,tag=35)
    assert msg == "calc_finished"
    
    for minibatch_index in range(subb):
        # copy from preload area
        drv.memcpy_dtod(gpu_data_remote[minibatch_index].ptr,
                        gpu_data[minibatch_index].ptr,
                        gpu_data[minibatch_index].dtype.itemsize *
                        gpu_data[minibatch_index].size
                        )

    ctx.synchronize()

    icomm.isend("copy_finished",dest=0,tag=55)
    
    return count
开发者ID:hma02,项目名称:platoon,代码行数:59,代码来源:proc_load_mpi.py

示例9: copy

 def copy(self):
     """
     returns a duplicated copy of self
     """
     result = self._new_like_me()
     if self.size:
         cuda.memcpy_dtod(result.gpudata, self.gpudata, self.mem_size * self.dtype.itemsize)
     
     return result
开发者ID:bionet,项目名称:vtem,代码行数:9,代码来源:parray.py

示例10: _loadInput

    def _loadInput(self, stim):
        logging.debug('loadInput')

        # shortcuts
        nrXY = self.nrX * self.nrY
        nrXYD = self.nrX * self.nrY * self.nrDirs

        # parse input
        assert type(stim).__module__ == "numpy", "stim must be numpy array"
        assert type(stim).__name__ == "ndarray", "stim must be numpy.ndarray"
        assert stim.size > 0, "stim cannot be []"
        stim = stim.astype(np.ubyte)

        rows, cols = stim.shape
        logging.debug("- stim shape={0}x{1}".format(rows, cols))

        # shift d_stimBuf in time by 1 frame, from frame i to frame i-1
        # write our own memcpy kernel... :-(
        gdim = (int(iDivUp(nrXY, 128)), 1)
        bdim = (128, 1, 1)
        for i in xrange(1, self.nrT):
            stimBufPt_dst = np.intp(self.d_stimBuf) + self.szXY * (i - 1)
            stimBufPt_src = np.intp(self.d_stimBuf) + self.szXY * i
            self.dev_memcpy_dtod(
                stimBufPt_dst,
                stimBufPt_src,
                np.int32(nrXY),
                block=bdim, grid=gdim)

        # index into d_stimBuf array to place the new stim at the end
        # (newest frame at pos: nrT-1)
        d_stimBufPt = np.intp(self.d_stimBuf) + self.szXY * (self.nrT-1)

        # \TODO implement RGB support
        self.dev_split_gray(
            d_stimBufPt,
            cuda.In(stim),
            np.int32(stim.size),
            block=bdim, grid=gdim)

        # create working copy of d_stimBuf
        cuda.memcpy_dtod(self.d_scalingStimBuf, self.d_stimBuf,
                         self.szXY*self.nrT)

        # reset V1complex responses to 0
        # \FIXME not sure how to use memset...doesn't seem to give expected
        # result
        tmp = np.zeros(nrXYD).astype(np.float32)
        cuda.memcpy_htod(self.d_respV1c, tmp)

        # allocate d_resp, which will contain the response to all 28
        # (nrFilters) space-time orientations at 3 (nrScales) scales for
        # every pixel location (nrX*nrY)
        tmp = np.zeros(nrXY*self.nrFilters*self.nrScales).astype(np.float32)
        cuda.memcpy_htod(self.d_resp, tmp)
开发者ID:UCI-CARL,项目名称:MotionEnergy,代码行数:55,代码来源:motionenergy.py

示例11: _update_buffer

 def _update_buffer(self):
     if self.my_num_gpot_neurons>0:
         cuda.memcpy_dtod(int(self.buffer.gpot_buffer.gpudata) + \
             self.buffer.gpot_current*self.buffer.gpot_buffer.ld* \
             self.buffer.gpot_buffer.dtype.itemsize, self.V.gpudata, \
             self.V.nbytes)
     if self.my_num_spike_neurons>0:
         cuda.memcpy_dtod(int(self.buffer.spike_buffer.gpudata) + \
             self.buffer.spike_current*self.buffer.spike_buffer.ld* \
             self.buffer.spike_buffer.dtype.itemsize, self.spike_state.gpudata,\
             int(self.spike_state.dtype.itemsize*self.my_num_spike_neurons))
开发者ID:prabindh,项目名称:neurokernel,代码行数:11,代码来源:LPU.py

示例12: arrayp2g

def arrayp2g(pary):
    """convert a PitchArray to a GPUArray"""
    from pycuda.gpuarray import GPUArray
    result = GPUArray(pary.shape, pary.dtype)
    if pary.size:
        if pary.M == 1:
            cuda.memcpy_dtod(result.gpudata, pary.gpudata, pary.mem_size * pary.dtype.itemsize)
        else:
            PitchTrans(pary.shape, result.gpudata, _pd(result.shape), pary.gpudata, pary.ld, pary.dtype)
            
    return result
开发者ID:bionet,项目名称:vtem,代码行数:11,代码来源:parray.py

示例13: _set_state

 def _set_state(self, k, v):
     cls = type(self)
     if k in self.params_dict:
         cuda.memcpy_dtod(self.states[k].gpudata,
                          self.params_dict[k].gpudata,
                          self.params_dict[k].nbytes)
     else:
         if isinstance(v, float):
             self.states[k].fill(self.floattype(v))
         else:
             assert(v in cls.states)
             self.states[k].fill(self.floattype(cls.states[v]))
开发者ID:chungheng,项目名称:neurodriver,代码行数:12,代码来源:NDComponent.py

示例14: update

    def update(self):
        nn, ne, nne = np.int32([self.nn, self.ne, self.nne])
        dt, de, vf = np.float64([self.dt, self.de, self.vf])
        bs, gs = (256,1,1), (self.nn//256+1,1)
        ul, ul_prev, ul_tmp = self.ul_gpu, self.ul_prev_gpu, self.ul_tmp_gpu
        kl = self.kl_gpu
        el_sum = self.el_sum_gpu
        c_ul_tmps = np.float32([0, 0.5, 0.5, 1])
        c_uls = np.float32([1./6, 1./3, 1./3, 1./6])

        cuda.memcpy_dtod(ul_prev, ul, self.ul.nbytes)
        for c_ul_tmp, c_ul in zip(c_ul_tmps, c_uls):
            self.update_pre(nn, nne, vf, c_ul_tmp, ul, ul_prev, ul_tmp, kl, el_sum, block=bs, grid=gs)
            self.update_ul(nn, ne, nne, dt, de, vf, c_ul, ul, ul_tmp, kl, el_sum, block=bs, grid=gs)
开发者ID:wbkifun,项目名称:my_research,代码行数:14,代码来源:dg_modal_gpu.py

示例15: stepFunction

def stepFunction():
  global animIter
  cuda.memcpy_dtod( plotDataFloat_d.ptr, concentrationOut_d.ptr, concentrationOut_d.nbytes )
  maxVal = (gpuarray.max(plotDataFloat_d)).get()
  multiplyByScalarReal( cudaPre(0.5/(maxVal)), plotDataFloat_d )
  floatToUchar( plotDataFloat_d, plotDataChars_d)
  copyToScreenArray()
  if cudaP == "float": [ oneIteration_tex() for i in range(nIterationsPerPlot) ]
  #else: [ oneIteration_sh() for i in range(nIterationsPerPlot//2) ]
  if plotting and animIter%25 == 0: 
    maxVals.append( maxVal )
    sumConc.append( gpuarray.sum(concentrationIn_d).get() )
    plotData( maxVals, sumConc )
  animIter += 1
开发者ID:bvillasen,项目名称:percolation,代码行数:14,代码来源:percolation3D.py


注:本文中的pycuda.driver.memcpy_dtod函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。