当前位置: 首页>>代码示例>>Python>>正文


Python driver.memcpy_htod函数代码示例

本文整理汇总了Python中pycuda.driver.memcpy_htod函数的典型用法代码示例。如果您正苦于以下问题:Python memcpy_htod函数的具体用法?Python memcpy_htod怎么用?Python memcpy_htod使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了memcpy_htod函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, view_tile, size, sigma, debug=False):
        self.debug = debug
        if size[0] < 2 or size[1] < 2:
            raise ValueError("Split needs to be at least 2x2")

        self.data_sets = view_tile.get_Data()
        for dset in self.data_sets:
            data = dset.getDataSet()
            if not data.flags['C_CONTIGUOUS']:
                print "NOT CONTIGUOUS, trying to reformat the points"
                data = np.require(data, dtype=data.dtype, requirements=['C'])
                if not data.flags['C_CONTIGUOUS']:
                    raise Exception("Points are not contiguous")
                dset.setDataSet(data)

        self.view_tile = view_tile
        self.sigma = sigma
        self.pts_gpu = None

        # Initiates all of cuda stuff
        self.grid = np.zeros(size).astype(np.float32)
        self.grid_gpu = cuda.mem_alloc_like(self.grid)
        cuda.memcpy_htod(self.grid_gpu, self.grid)

        kernel = SourceModule(self.__cuda_code)
        self.gpu_gaussian = kernel.get_function("gpu_gaussian")

        self.view = self.view_tile.get_View()

        self.grid_size, self.block_size = self.__setup_cuda_sizes(size)

        self.dx = 1 / float(size[1] - 1)
        self.dy = 1 / float(size[0] - 1)
开发者ID:SCIInstitute,项目名称:MLM,代码行数:33,代码来源:gaussian_gpu_grid.py

示例2: test_constant_memory

    def test_constant_memory(self):
        # contributed by Andrew Wagner

        module = SourceModule("""
        __constant__ float const_array[32];

        __global__ void copy_constant_into_global(float* global_result_array)
        {
            global_result_array[threadIdx.x] = const_array[threadIdx.x];
        }
        """)

        copy_constant_into_global = module.get_function("copy_constant_into_global")
        const_array, _ = module.get_global('const_array')

        host_array = np.random.randint(0,255,(32,)).astype(np.float32)

        global_result_array = drv.mem_alloc_like(host_array)
        drv.memcpy_htod(const_array, host_array)

        copy_constant_into_global(
                global_result_array,
                grid=(1, 1), block=(32, 1, 1))

        host_result_array = np.zeros_like(host_array)
        drv.memcpy_dtoh(host_result_array, global_result_array)

        assert (host_result_array == host_array).all
开发者ID:davidweichiang,项目名称:pycuda,代码行数:28,代码来源:test_driver.py

示例3: edgetaper_gpu

def edgetaper_gpu(y_gpu, sf, win='barthann'):

  shape = np.array(y_gpu.shape).astype(np.uint32)
  dtype = y_gpu.dtype
  block_size = (16,16,1)
  grid_size = (int(np.ceil(float(shape[1])/block_size[0])),
               int(np.ceil(float(shape[0])/block_size[1])))

  # Ensure that sf is odd
  sf = sf+(1-np.mod(sf,2))
  wx = scipy.signal.get_window(win, sf[1])
  wy = scipy.signal.get_window(win, sf[0])
  maxw = wx.max() * wy.max()
  
  hsf = np.floor(sf/2)
  wx = (wx[0:hsf[1]] / maxw).astype(dtype)
  wy = (wy[0:hsf[0]] / maxw).astype(dtype)

  preproc = _generate_preproc(dtype, shape)
  preproc += '#define wx_size %d\n' % wx.size
  preproc += '#define wy_size %d\n' % wy.size
  mod = SourceModule(preproc + edgetaper_code, keep=True)
  edgetaper_gpu = mod.get_function("edgetaper")
  wx_gpu, wx_size = mod.get_global('wx')
  wy_gpu, wy_size = mod.get_global('wy')

  cu.memcpy_htod(wx_gpu, wx)
  cu.memcpy_htod(wy_gpu, wy)

  edgetaper_gpu(y_gpu, np.int32(hsf[1]), np.int32(hsf[0]),
                block=block_size, grid=grid_size)
开发者ID:matthiaslee,项目名称:VMBD,代码行数:31,代码来源:gputools.py

示例4: prepare_device_arrays

    def prepare_device_arrays(self):

        self.maxLayers  = self.grid_prop.GetMaxLayers()
        nczbins_fine    = len(self.czcen_fine)
        numLayers       = np.zeros(nczbins_fine,dtype=np.int32)
        densityInLayer  = np.zeros((nczbins_fine*self.maxLayers),dtype=self.FTYPE)
        distanceInLayer = np.zeros((nczbins_fine*self.maxLayers),dtype=self.FTYPE)

        self.grid_prop.GetNumberOfLayers(numLayers)
        self.grid_prop.GetDensityInLayer(densityInLayer)
        self.grid_prop.GetDistanceInLayer(distanceInLayer)

        # Copy all these earth info arrays to device:
        self.d_numLayers       = cuda.mem_alloc(numLayers.nbytes)
        self.d_densityInLayer  = cuda.mem_alloc(densityInLayer.nbytes)
        self.d_distanceInLayer = cuda.mem_alloc(distanceInLayer.nbytes)
        cuda.memcpy_htod(self.d_numLayers,numLayers)
        cuda.memcpy_htod(self.d_densityInLayer,densityInLayer)
        cuda.memcpy_htod(self.d_distanceInLayer,distanceInLayer)

        self.d_ecen_fine = cuda.mem_alloc(self.ecen_fine.nbytes)
        self.d_czcen_fine = cuda.mem_alloc(self.czcen_fine.nbytes)
        cuda.memcpy_htod(self.d_ecen_fine,self.ecen_fine)
        cuda.memcpy_htod(self.d_czcen_fine,self.czcen_fine)

        return
开发者ID:gkrueckl,项目名称:pisa,代码行数:26,代码来源:Prob3GPUOscillationService.py

示例5: _set

    def _set(self, ary):
        # Allocate a new buffer with suitable padding and pack it
        buf = np.zeros((self.nrow, self.leaddim), dtype=self.dtype)
        buf[:, :self.ncol] = self._pack(ary)

        # Copy
        cuda.memcpy_htod(self.data, buf)
开发者ID:pv101,项目名称:PyFR,代码行数:7,代码来源:types.py

示例6: from_np

    def from_np(np_data):
        cudabuf = cuda.mem_alloc(np_data.nbytes)
        cuda.memcpy_htod(cudabuf, np_data)
#        self.cpudata = np_data
        tensor = MyTensor(cudabuf, shape=np_data.shape, size=np_data.size)
        tensor.cpudata = np_data
        return tensor
开发者ID:hughperkins,项目名称:neon,代码行数:7,代码来源:test_correctness.py

示例7: cuda_crossOver

def cuda_crossOver(sola, solb):
    """ """
    
    sol_len = len(sola);
    
    a_gpu = cuda.mem_alloc(sola.nbytes);
    b_gpu = cuda.mem_alloc(solb.nbytes);
    
    cuda.memcpy_htod(a_gpu, sola);
    cuda.memcpy_htod(b_gpu, solb);
    
    func = mod.get_function("crossOver");
    func(a_gpu,b_gpu, block=(sol_len,1,1));
    
    a_new = numpy.empty_like(sola);
    b_new = numpy.empty_like(solb);
    
    cuda.memcpy_dtoh(a_new, a_gpu);
    cuda.memcpy_dtoh(b_new, b_gpu);
    
    if debug == True:
        print "a:", a;
        print "b:",b;
        print "new a:",a_new;
        print "new b:",b_new;
        
    return a_new,b_new;
开发者ID:adamuas,项目名称:coevondm,代码行数:27,代码来源:cudaInterface.py

示例8: _to_device

 def _to_device(self, module):
     ptr, size = module.get_global(self.name)
     if size != self.data.nbytes:
         raise RuntimeError("Const %s needs %d bytes, but only space for %d" % (self, self.data.nbytes, size))
     if self.state is DeviceDataMixin.HOST:
         driver.memcpy_htod(ptr, self._data)
         self.state = DeviceDataMixin.BOTH
开发者ID:RomainBrault,项目名称:PyOP2,代码行数:7,代码来源:cuda.py

示例9: __init__

    def __init__(self, n_dict, V, dt, debug=False):

        self.num_neurons = len(n_dict['id'])
        self.dt = np.double(dt)
        self.steps = max(int(round(dt / 1e-5)), 1)
        self.debug = debug

        self.ddt = dt / self.steps

        self.V = V

        self.n = garray.to_gpu(np.asarray(n_dict['initn'], dtype=np.float64))

        self.V_1 = garray.to_gpu(np.asarray(n_dict['V1'], dtype=np.float64))
        self.V_2 = garray.to_gpu(np.asarray(n_dict['V2'], dtype=np.float64))
        self.V_3 = garray.to_gpu(np.asarray(n_dict['V3'], dtype=np.float64))
        self.V_4 = garray.to_gpu(np.asarray(n_dict['V4'], dtype=np.float64))
        self.V_l = garray.to_gpu(np.asarray(n_dict['V_l'], dtype = np.float64))
        self.V_ca = garray.to_gpu(np.asarray(n_dict['V_ca'], dtype = np.float64))
        self.V_k = garray.to_gpu(np.asarray(n_dict['V_k'], dtype = np.float64))
        self.G_l = garray.to_gpu(np.asarray(n_dict['G_l'], dtype = np.float64))
        self.G_ca = garray.to_gpu(np.asarray(n_dict['G_ca'], dtype = np.float64))
        self.G_k = garray.to_gpu(np.asarray(n_dict['G_k'], dtype = np.float64))
        self.Tphi = garray.to_gpu(np.asarray(n_dict['phi'], dtype=np.float64))
        self.offset = garray.to_gpu(np.asarray(n_dict['offset'],
                                               dtype=np.float64))

        cuda.memcpy_htod(int(self.V), np.asarray(n_dict['initV'], 
                         dtype=np.double))
        self.update = self.get_euler_kernel()
开发者ID:yiyin,项目名称:neurokernel,代码行数:30,代码来源:MorrisLecar_a.py

示例10: evaluate

  def evaluate(self, params, returnOutputs=False):
    """Evaluate several networks (with given params) on training set.
    
    @param params: network params
    @type params: list of Parameters
    @param returnOutputs: return network output values (debug)
    @type returnOutputs: bool, default False
    
    @return output matrix if returnOutputs=True, else None
    """
    if self.popSize != len(params):
      raise ValueError("Need %d Parameter structures (provided %d)" % (
        self.popSize, len(params)))
    
    paramArrayType = Parameters * len(params)
    driver.memcpy_htod(self.params, paramArrayType(*params))

    # TODO: remove
    driver.memset_d8(self.outputs, 0, self.popSize * self.trainSet.size * 4)
    
    self.evaluateKernel.prepared_call(self.evaluateGridDim,
                                      self.trainSetDev,
                                      self.trainSet.size,
                                      self.params,
                                      self.popSize,
                                      self.outputs)

    driver.Context.synchronize()

    self.outputsMat = driver.from_device(self.outputs,
                                         shape=(self.popSize, self.trainSet.size),
                                         dtype=np.float32)
    
    if returnOutputs:
      return self.outputsMat
开发者ID:cpatulea,项目名称:evolution,代码行数:35,代码来源:ann.py

示例11: __init__

    def __init__(self, n_dict, V, dt, debug=False, cuda_verbose=False):
        if cuda_verbose:
            self.compile_options = ["--ptxas-options=-v"]
        else:
            self.compile_options = []

        self.num_neurons = len(n_dict["id"])
        self.dt = np.double(dt)
        self.steps = max(int(round(dt / 1e-5)), 1)
        self.debug = debug

        self.ddt = dt / self.steps

        self.V = V

        self.n = garray.to_gpu(np.asarray(n_dict["initn"], dtype=np.float64))

        self.V_1 = garray.to_gpu(np.asarray(n_dict["V1"], dtype=np.float64))
        self.V_2 = garray.to_gpu(np.asarray(n_dict["V2"], dtype=np.float64))
        self.V_3 = garray.to_gpu(np.asarray(n_dict["V3"], dtype=np.float64))
        self.V_4 = garray.to_gpu(np.asarray(n_dict["V4"], dtype=np.float64))
        self.V_l = garray.to_gpu(np.asarray(n_dict["V_l"], dtype=np.float64))
        self.V_ca = garray.to_gpu(np.asarray(n_dict["V_ca"], dtype=np.float64))
        self.V_k = garray.to_gpu(np.asarray(n_dict["V_k"], dtype=np.float64))
        self.G_l = garray.to_gpu(np.asarray(n_dict["G_l"], dtype=np.float64))
        self.G_ca = garray.to_gpu(np.asarray(n_dict["G_ca"], dtype=np.float64))
        self.G_k = garray.to_gpu(np.asarray(n_dict["G_k"], dtype=np.float64))
        self.Tphi = garray.to_gpu(np.asarray(n_dict["phi"], dtype=np.float64))
        self.offset = garray.to_gpu(np.asarray(n_dict["offset"], dtype=np.float64))

        cuda.memcpy_htod(int(self.V), np.asarray(n_dict["initV"], dtype=np.double))
        self.update = self.get_euler_kernel()
开发者ID:neurokernel,项目名称:neurodriver,代码行数:32,代码来源:MorrisLecar_a.py

示例12: __compile_kernels

  def __compile_kernels(self):
    """ DFS module """
    f = self.forest
    self.find_min_kernel = f.find_min_kernel  
    self.fill_kernel = f.fill_kernel 
    self.scan_reshuffle_tex = f.scan_reshuffle_tex 
    self.comput_total_2d = f.comput_total_2d 
    self.reduce_2d = f.reduce_2d
    self.scan_total_2d = f.scan_total_2d 
    self.scan_reduce = f.scan_reduce 
    
    """ BFS module """
    self.scan_total_bfs = f.scan_total_bfs
    self.comput_bfs_2d = f.comput_bfs_2d
    self.fill_bfs = f.fill_bfs 
    self.reshuffle_bfs = f.reshuffle_bfs 
    self.reduce_bfs_2d = f.reduce_bfs_2d 
    self.get_thresholds = f.get_thresholds 

    """ Other """
    self.predict_kernel = f.predict_kernel 
    self.mark_table = f.mark_table
    const_sorted_indices = f.bfs_module.get_global("sorted_indices_1")[0]
    const_sorted_indices_ = f.bfs_module.get_global("sorted_indices_2")[0]
    cuda.memcpy_htod(const_sorted_indices, np.uint64(self.sorted_indices_gpu.ptr)) 
    cuda.memcpy_htod(const_sorted_indices_, np.uint64(self.sorted_indices_gpu_.ptr)) 
开发者ID:phecy,项目名称:CudaTree,代码行数:26,代码来源:random_tree.py

示例13: calc_bandwidth_h2d

	def calc_bandwidth_h2d( s ):
		t1 = datetime.now()
		cuda.memcpy_htod( s.dev_a, s.a )
		dt = datetime.now() - t1
		dt_float = dt.seconds + dt.microseconds*1e-6

		return s.nbytes/dt_float/gbytes
开发者ID:wbkifun,项目名称:fdtd_accelerate,代码行数:7,代码来源:150-gpus-mpi-range-h5-seperate.py

示例14: __compute_guassian_on_pts

    def __compute_guassian_on_pts(self):
        view = self.view_tile.get_View()

        for dset in self.data_sets:
            _data = np.array(dset.getDataSet(), copy=True)
            _data[:, 0] = (_data[:, 0] - view.left)/view.width()
            _data[:, 1] = (_data[:, 1] - view.bottom)/view.height()

            for row in range(self.grid_size[0]):
                for col in range(self.grid_size[1]):
                    # 3 * SIGMA give the 95%
                    left = 1 / float(self.grid_size[1]) * col - (3 * self.sigma)
                    right = 1 / float(self.grid_size[1]) * (col + 1) + (3 * self.sigma)
                    bottom = 1 / float(self.grid_size[0]) * row - (3 * self.sigma)
                    top = 1 / float(self.grid_size[0]) * (row + 1) + (3 * self.sigma)
                    pts = getFilteredDataSet(_data, (left, right, bottom, top))

                    if len(pts) > 0:
                        self.pts_gpu = cuda.mem_alloc_like(pts)
                        cuda.memcpy_htod(self.pts_gpu, pts)

                        self.gpu_gaussian(self.grid_gpu,  # Grid
                                          self.pts_gpu,  # Points
                                          np.int32(col),  # Block Index x
                                          np.int32(row),  # Block Index y
                                          np.int32(self.grid_size[1]),  # Grid Dimensions x
                                          np.int32(self.grid_size[0]),  # Grid Dimensions y
                                          np.int32(pts.shape[0]),  # Point Length
                                          np.float32(self.dx),  # dx
                                          np.float32(self.dy),  # dy
                                          np.float32(self.sigma),  # Sigma
                                          block=self.block_size)

                        self.pts_gpu.free()
开发者ID:SCIInstitute,项目名称:MLM,代码行数:34,代码来源:gaussian_gpu_grid.py

示例15: interior_buffer

def interior_buffer(source_im, dest_im, b_size, g_size, RGB, neighbors):
	# create Cheetah template and fill in variables for mask kernel
	mask_template = Template(mask_source)
	mask_template.BLOCK_DIM_X = b_size[0]
  	mask_template.BLOCK_DIM_Y = b_size[1]
  	mask_template.WIDTH = dest_im.shape[1]
  	mask_template.HEIGHT = dest_im.shape[0]
  	mask_template.RGB = RGB
  	mask_template.NEIGHBORS = neighbors

  	# compile the CUDA kernel
  	mask_kernel = cuda_compile(mask_template, "mask_kernel")

  	# alloc memory to GPU
  	d_source = cu.mem_alloc(source_im.nbytes)
  	cu.memcpy_htod(d_source, source_im)

  	# sends to GPU filter out interior points in the mask
  	mask_kernel(d_source, block=b_size, grid=g_size)

  	# retrieves interior point buffer from GPU
  	inner_buffer = np.array(dest_im, dtype =np.uint8)
  	cu.memcpy_dtoh(inner_buffer, d_source)

  	# returns the interior buffer
  	return inner_buffer
开发者ID:JMTing,项目名称:cs205,代码行数:26,代码来源:parallel_poisson.py


注:本文中的pycuda.driver.memcpy_htod函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。