当前位置: 首页>>代码示例>>Python>>正文


Python driver.dtype_to_array_format函数代码示例

本文整理汇总了Python中pycuda.driver.dtype_to_array_format函数的典型用法代码示例。如果您正苦于以下问题:Python dtype_to_array_format函数的具体用法?Python dtype_to_array_format怎么用?Python dtype_to_array_format使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了dtype_to_array_format函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: bind_to_texref_ext

    def bind_to_texref_ext(self, texref, channels=1, allow_double_hack=False,
            allow_offset=False):
        if not self.flags.forc:
            raise RuntimeError("only contiguous arrays may "
                    "be used as arguments to this operation")

        if self.dtype == np.float64 and allow_double_hack:
            if channels != 1:
                raise ValueError(
                        "'fake' double precision textures can "
                        "only have one channel")

            channels = 2
            fmt = drv.array_format.SIGNED_INT32
            read_as_int = True
        else:
            fmt = drv.dtype_to_array_format(self.dtype)
            read_as_int = np.integer in self.dtype.type.__mro__

        offset = texref.set_address(self.gpudata, self.nbytes, allow_offset=allow_offset)
        texref.set_format(fmt, channels)

        if read_as_int:
            texref.set_flags(texref.get_flags() | drv.TRSF_READ_AS_INTEGER)

        return offset/self.dtype.itemsize
开发者ID:hannes-brt,项目名称:pycuda,代码行数:26,代码来源:gpuarray.py

示例2: create_3d_texture

def create_3d_texture(a, module, variable, point_sampling=False):
    
    a = numpy.asfortranarray(a)
    w, h, d = a.shape
    
    descr = cuda.ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = cuda.dtype_to_array_format(a.dtype)
    descr.num_channels = 1
    descr.flags = 0
    ary = cuda.Array(descr)
   
    copy = cuda.Memcpy3D()
    copy.set_src_host(a)
    copy.set_dst_array(ary)
    copy.width_in_bytes = copy.src_pitch = a.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d
    copy()

    out_texref = module.get_texref(variable)
    out_texref.set_array(ary)
    if point_sampling: 
        out_texref.set_filter_mode(cuda.filter_mode.POINT)
    else: 
        out_texref.set_filter_mode(cuda.filter_mode.LINEAR)
    return out_texref
开发者ID:hvcl,项目名称:Vivaldi,代码行数:29,代码来源:texture.py

示例3: gpuArray3DtocudaArray

def gpuArray3DtocudaArray( gpuArray, allowSurfaceBind=False, precision='float' ):
  #import pycuda.autoinit
  d, h, w = gpuArray.shape
  descr3D = cuda.ArrayDescriptor3D()
  descr3D.width = w
  descr3D.height = h
  descr3D.depth = d
  if precision == 'float':
    descr3D.format = cuda.dtype_to_array_format(gpuArray.dtype)
    descr3D.num_channels = 1
  elif precision == 'double': 
    descr3D.format = cuda.array_format.SIGNED_INT32
    descr3D.num_channels = 2
  else: 
    print "ERROR:  CUDA_ARRAY incompatible precision"
    sys.exit()
  descr3D.flags = 0
  if allowSurfaceBind:
    descr3D.flags = cuda.array3d_flags.SURFACE_LDST
  cudaArray = cuda.Array(descr3D)
  copy3D = cuda.Memcpy3D()
  copy3D.set_src_device(gpuArray.ptr)
  copy3D.set_dst_array(cudaArray)
  copy3D.width_in_bytes = copy3D.src_pitch = gpuArray.strides[1]
  copy3D.src_height = copy3D.height = h
  copy3D.depth = d
  copy3D()
  return cudaArray, copy3D
开发者ID:bvillasen,项目名称:tools,代码行数:28,代码来源:cudaTools.py

示例4: np3d_to_device_array

def np3d_to_device_array(np_array, allow_surface_bind=True):
      d, h, w = np_array.shape

      descr = drv.ArrayDescriptor3D()
      descr.width = w
      descr.height = h
      descr.depth = d
      descr.format = drv.dtype_to_array_format(np_array.dtype)
      descr.num_channels = 1
      descr.flags = 0

      if allow_surface_bind:
            descr.flags = drv.array3d_flags.SURFACE_LDST

      device_array = drv.Array(descr)

      copy = drv.Memcpy3D()
      copy.set_src_host(np_array)
      copy.set_dst_array(device_array)
      copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
      copy.src_height = copy.height = h
      copy.depth = d

      copy()

      return device_array
开发者ID:danielgrassinger,项目名称:yt_new_frontend,代码行数:26,代码来源:cuda.py

示例5: test_3d_texture

    def test_3d_texture(self):
        # adapted from code by Nicolas Pinto
        w = 2
        h = 4
        d = 8
        shape = (w, h, d)

        a = np.asarray(
                np.random.randn(*shape),
                dtype=np.float32, order="F")

        descr = drv.ArrayDescriptor3D()
        descr.width = w
        descr.height = h
        descr.depth = d
        descr.format = drv.dtype_to_array_format(a.dtype)
        descr.num_channels = 1
        descr.flags = 0

        ary = drv.Array(descr)

        copy = drv.Memcpy3D()
        copy.set_src_host(a)
        copy.set_dst_array(ary)
        copy.width_in_bytes = copy.src_pitch = a.strides[1]
        copy.src_height = copy.height = h
        copy.depth = d

        copy()

        mod = SourceModule("""
        texture<float, 3, cudaReadModeElementType> mtx_tex;

        __global__ void copy_texture(float *dest)
        {
          int x = threadIdx.x;
          int y = threadIdx.y;
          int z = threadIdx.z;
          int dx = blockDim.x;
          int dy = blockDim.y;
          int i = (z*dy + y)*dx + x;
          dest[i] = tex3D(mtx_tex, x, y, z);
          //dest[i] = x;
        }
        """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        mtx_tex.set_array(ary)

        dest = np.zeros(shape, dtype=np.float32, order="F")
        copy_texture(drv.Out(dest), block=shape, texrefs=[mtx_tex])
        assert la.norm(dest-a) == 0
开发者ID:davidweichiang,项目名称:pycuda,代码行数:54,代码来源:test_driver.py

示例6: malloc_gpu_arrays

def malloc_gpu_arrays(nx, ny, nz, cex, cey, cez):
    print "rank= %d, (%d, %d, %d)" % (rank, nx, ny, nz),
    total_bytes = nx * ny * nz * 4 * 9
    if total_bytes / (1024 ** 3) == 0:
        print "%d MB" % (total_bytes / (1024 ** 2))
    else:
        print "%1.2f GB" % (float(total_bytes) / (1024 ** 3))

    if nz % Dx != 0:
        print "Error: nz is not multiple of %d" % (Dx)
        sys.exit()
    if (nx * ny) % Dy != 0:
        print "Error: nx*ny is not multiple of %d" % (Dy)
        sys.exit()

    f = np.zeros((nx, ny, nz), "f")
    ex_gpu = cuda.to_device(f)
    ey_gpu = cuda.to_device(f)
    ez_gpu = cuda.to_device(f)
    hx_gpu = cuda.to_device(f)
    hy_gpu = cuda.to_device(f)
    hz_gpu = cuda.to_device(f)

    descr = cuda.ArrayDescriptor3D()
    descr.width = nz
    descr.height = ny
    descr.depth = nx
    descr.format = cuda.dtype_to_array_format(f.dtype)
    descr.num_channels = 1
    descr.flags = 0
    tcex_gpu = cuda.Array(descr)
    tcey_gpu = cuda.Array(descr)
    tcez_gpu = cuda.Array(descr)

    mcopy = cuda.Memcpy3D()
    mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
    mcopy.src_height = mcopy.height = ny
    mcopy.depth = nx

    mcopy.set_src_host(cex)
    mcopy.set_dst_array(tcex_gpu)
    mcopy()
    mcopy.set_src_host(cey)
    mcopy.set_dst_array(tcey_gpu)
    mcopy()
    mcopy.set_src_host(cez)
    mcopy.set_dst_array(tcez_gpu)
    mcopy()

    eh_fields = [ex_gpu, ey_gpu, ez_gpu, hx_gpu, hy_gpu, hz_gpu]
    tex_fields = [tcex_gpu, tcey_gpu, tcez_gpu]

    return eh_fields, tex_fields
开发者ID:wbkifun,项目名称:fdtd_accelerate,代码行数:53,代码来源:150-nGPU-func.py

示例7: gpuArray2DtocudaArray

def gpuArray2DtocudaArray( gpuArray ):
  #import pycuda.autoinit
  h, w = gpuArray.shape
  descr2D = cuda.ArrayDescriptor()
  descr2D.width = w
  descr2D.height = h
  descr2D.format = cuda.dtype_to_array_format(gpuArray.dtype)
  descr2D.num_channels = 1
  cudaArray = cuda.Array(descr2D)
  copy2D = cuda.Memcpy2D()
  copy2D.set_src_device(gpuArray.ptr)
  copy2D.set_dst_array(cudaArray)
  copy2D.src_pitch = gpuArray.strides[0]
  copy2D.width_in_bytes = copy2D.src_pitch = gpuArray.strides[0]
  copy2D.src_height = copy2D.height = h
  copy2D(aligned=True)
  return cudaArray, copy2D
开发者ID:bvillasen,项目名称:tools,代码行数:17,代码来源:cudaTools.py

示例8: np2DtoCudaArray

def np2DtoCudaArray( npArray, allowSurfaceBind=False ):
  #import pycuda.autoinit
  h, w = npArray.shape
  descr2D = cuda.ArrayDescriptor()
  descr2D.width = w
  descr2D.height = h
  descr2D.format = cuda.dtype_to_array_format(npArray.dtype)
  descr2D.num_channels = 1
  if allowSurfaceBind:
    descr.flags = cuda.array3d_flags.SURFACE_LDST
  cudaArray = cuda.Array(descr2D)
  copy2D = cuda.Memcpy2D()
  copy2D.set_src_host(npArray)
  copy2D.set_dst_array(cudaArray)
  copy2D.src_pitch = npArray.strides[0]
  copy2D.width_in_bytes = copy2D.src_pitch = npArray.strides[0]
  copy2D.src_height = copy2D.height = h
  copy2D(aligned=True)
  return cudaArray, descr2D
开发者ID:bvillasen,项目名称:tools,代码行数:19,代码来源:cudaTools.py

示例9: bind_to_texref_ext

    def bind_to_texref_ext(self, texref, channels=1, allow_double_hack=False, 
            allow_offset=False):
        if self.dtype == numpy.float64 and allow_double_hack:
            if channels != 1:
                raise ValueError, "'fake' double precision textures can only have one channel"

            channels = 2
            fmt = drv.array_format.SIGNED_INT32
            read_as_int = True
        else:
            fmt = drv.dtype_to_array_format(self.dtype)
            read_as_int = numpy.integer in self.dtype.type.__mro__

        offset = texref.set_address(self.gpudata, self.nbytes, allow_offset=allow_offset)
        texref.set_format(fmt, channels)

        if read_as_int:
            texref.set_flags(texref.get_flags() | drv.TRSF_READ_AS_INTEGER)

        return offset/self.dtype.itemsize
开发者ID:minrk,项目名称:PyCUDA,代码行数:20,代码来源:gpuarray.py

示例10: np3DtoCudaArray

def np3DtoCudaArray( npArray, allowSurfaceBind=False ):
  #import pycuda.autoinit
  d, h, w = npArray.shape
  descr3D = cuda.ArrayDescriptor3D()
  descr3D.width = w
  descr3D.height = h
  descr3D.depth = d
  descr3D.format = cuda.dtype_to_array_format(npArray.dtype)
  descr3D.num_channels = 1
  descr3D.flags = 0
  if allowSurfaceBind:
    descr3D.flags = cuda.array3d_flags.SURFACE_LDST
  cudaArray = cuda.Array(descr3D)
  copy3D = cuda.Memcpy3D()
  copy3D.set_src_host(npArray)
  copy3D.set_dst_array(cudaArray)
  copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[1]
  copy3D.src_height = copy3D.height = h
  copy3D.depth = d
  copy3D()
  return cudaArray
开发者ID:bvillasen,项目名称:tools,代码行数:21,代码来源:cudaTools.py

示例11: _prepare_F_texture

 def _prepare_F_texture(self):
     
     descr = drv.ArrayDescriptor3D()
     descr.width = self.side
     descr.height = self.side
     descr.depth = self.side
     descr.format = drv.dtype_to_array_format(self.F_gpu.dtype)
     descr.num_channels = 1
     descr.flags = 0
     
     F_array = drv.Array(descr)
     
     copy = drv.Memcpy3D()
     copy.set_src_device(self.F_gpu.gpudata)
     copy.set_dst_array(F_array)
     copy.width_in_bytes = copy.src_pitch = self.F_gpu.strides[1]
     copy.src_height = copy.height = self.side
     copy.depth = self.side
     
     self.F_gpu_to_array_copy = copy
     self.F_gpu_to_array_copy()
     self.F_texture.set_array(F_array)
开发者ID:caomw,项目名称:FreenectFusion,代码行数:22,代码来源:dense.py

示例12: numpy3d_to_array

def numpy3d_to_array(np_array, order=None):
    '''
    Method for copying a numpy array to a CUDA array

    If you get a buffer error, run this method on np_array.copy('F')
    '''
    from pycuda.driver import Array, ArrayDescriptor3D, Memcpy3D, dtype_to_array_format
    if order is None:
        order = 'C' if np_array.strides[0] > np_array.strides[2] else 'F'

    if order.upper() == 'C':
        d, h, w = np_array.shape
    elif order.upper() == "F":
        w, h, d = np_array.shape
    else:
        raise Exception("order must be either F or C")

    descr = ArrayDescriptor3D()
    descr.width = w
    descr.height = h
    descr.depth = d
    descr.format = dtype_to_array_format(np_array.dtype)
    descr.num_channels = 1
    descr.flags = 0

    device_array = Array(descr)

    copy = Memcpy3D()
    copy.set_src_host(np_array)
    copy.set_dst_array(device_array)
    copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
    copy.src_height = copy.height = h
    copy.depth = d

    copy()

    return device_array
开发者ID:clhuang,项目名称:br_renderer,代码行数:37,代码来源:__init__.py

示例13: alloc_coeff_arrays

	def alloc_coeff_arrays(s):
		f = np.zeros((s.nx, s.ny, s.nz), 'f')
		s.cex = np.ones_like(f)*0.5
		s.cex[:,-1,:] = 0
		s.cex[:,:,-1] = 0
		s.cey = np.ones_like(f)*0.5
		s.cey[:,:,-1] = 0
		s.cey[-1,:,:] = 0
		s.cez = np.ones_like(f)*0.5
		s.cez[-1,:,:] = 0
		s.cez[:,-1,:] = 0

		descr = cuda.ArrayDescriptor3D()
		descr.width = s.nz
		descr.height = s.ny
		descr.depth = s.nx
		descr.format = cuda.dtype_to_array_format(f.dtype)
		descr.num_channels = 1
		descr.flags = 0
		s.tcex_gpu = cuda.Array(descr)
		s.tcey_gpu = cuda.Array(descr)
		s.tcez_gpu = cuda.Array(descr)

		mcpy = cuda.Memcpy3D()
		mcpy.width_in_bytes = mcpy.src_pitch = f.strides[1]
		mcpy.src_height = mcpy.height = s.ny
		mcpy.depth = s.nx
		mcpy.set_src_host( s.cex )
		mcpy.set_dst_array( s.tcex_gpu )
		mcpy()
		mcpy.set_src_host( s.cey )
		mcpy.set_dst_array( s.tcey_gpu )
		mcpy()
		mcpy.set_src_host( s.cez )
		mcpy.set_dst_array( s.tcez_gpu )
		mcpy()
开发者ID:wbkifun,项目名称:fdtd_accelerate,代码行数:36,代码来源:140-3GPU-class.py

示例14:

	# memory allocate
	f = np.zeros((nx,ny,nz),'f',order='F')

	ex_gpu = cuda.to_device(f)
	ey_gpu = cuda.to_device(f)
	ez_gpu = cuda.to_device(f)
	hx_gpu = cuda.to_device(f)
	hy_gpu = cuda.to_device(f)
	hz_gpu = cuda.to_device(f)

	descr = cuda.ArrayDescriptor3D()
	descr.width = nx
	descr.height = ny
	descr.depth = nz
	descr.format = cuda.dtype_to_array_format(f.dtype)
	descr.num_channels = 1
	descr.flags = 0

	cex_gpu = cuda.Array(descr)
	cey_gpu = cuda.Array(descr)
	cez_gpu = cuda.Array(descr)
	chx_gpu = cuda.Array(descr)
	chy_gpu = cuda.Array(descr)
	chz_gpu = cuda.Array(descr)

	mcopy = cuda.Memcpy3D()
	mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
	mcopy.src_height = mcopy.height = ny
	mcopy.depth = nz
开发者ID:wbkifun,项目名称:fdtd_accelerate,代码行数:29,代码来源:240-block3d-grid2d-texture.py

示例15: get_kernel


#.........这里部分代码省略.........
                    self.write_interior_flux_code(True))
                ),
            Line(),
            Comment("work around nvcc assertion failure"),
            S("fpair_nr+=1"),
            S("fpair_nr-=1"),
            Line(),
            Comment("fluxes for single-sided (inter-block) interior face pairs"),
            While("fpair_nr < data.header.diff_facepairs_end",
                get_flux_code(lambda:
                    self.write_interior_flux_code(False))
                ),
            Line(),
            Comment("fluxes for single-sided boundary face pairs"),
            While("fpair_nr < data.header.bdry_facepairs_end",
                get_flux_code(
                    lambda: self.write_boundary_flux_code(for_benchmark))
                ),
            ])

        f_body.extend_log_block("compute the fluxes", [
            Initializer(POD(numpy.uint32, "fpair_nr"), "BLOCK_FACE"),
            If("FACEDOF_NR < DOFS_PER_FACE", flux_computation)
            ])

        if not fplan.direct_store:
            f_body.extend([
                Line(),
                S("__syncthreads()"),
                Line()
                ])

            f_body.extend_log_block("store fluxes", [
                    #Assign("debugbuf[blockIdx.x]", "FOF_BLOCK_BASE"),
                    #Assign("debugbuf[0]", "FOF_BLOCK_BASE"),
                    #Assign("debugbuf[0]", "sizeof(face_pair)"),
                    For("unsigned word_nr = THREAD_NUM",
                        "word_nr < ALIGNED_FACE_DOFS_PER_MB*BLOCK_MB_COUNT",
                        "word_nr += COALESCING_THREAD_COUNT",
                        Block([Assign(
                            "gmem_fluxes_on_faces%d[FOF_BLOCK_BASE+word_nr]" % flux_nr,
                            "smem_fluxes_on_faces[%d][word_nr]" % flux_nr)
                            for flux_nr in range(len(self.fluxes))]
                           #+[If("isnan(smem_fluxes_on_faces[%d][word_nr])" % flux_nr,
                               #Block([
                                   #Assign("debugbuf[blockIdx.x]", "word_nr"),
                                   #])
                               #)
                            #for flux_nr in range(len(self.fluxes))]
                        )
                    )
                    ])
        if False:
            f_body.extend([
                    Assign("debugbuf[blockIdx.x*96+32+BLOCK_FACE*32+threadIdx.x]", "fpair_nr"),
                    Assign("debugbuf[blockIdx.x*96+16]", "data.header.same_facepairs_end"),
                    Assign("debugbuf[blockIdx.x*96+17]", "data.header.diff_facepairs_end"),
                    Assign("debugbuf[blockIdx.x*96+18]", "data.header.bdry_facepairs_end"),
                    ]
                    )

        # finish off ----------------------------------------------------------
        cmod.append(FunctionBody(f_decl, f_body))

        if not for_benchmark and "cuda_dump_kernels" in discr.debug:
            from hedge.tools import open_unique_debug_file
            open_unique_debug_file("flux_gather", ".cu").write(str(cmod))

        #from pycuda.tools import allow_user_edit
        mod = SourceModule(
                #allow_user_edit(cmod, "kernel.cu", "the flux kernel"),
                cmod,
                keep="cuda_keep_kernels" in discr.debug)
        expr_to_texture_map = dict(
                (dep_expr, mod.get_texref(
                    "field%d_tex" % self.dep_to_index[dep_expr]))
                for dep_expr in self.all_deps)

        index_list_texref = mod.get_texref("tex_index_lists")
        index_list_texref.set_address(
                ilist_data.device_memory,
                ilist_data.bytes)
        index_list_texref.set_format(
                cuda.dtype_to_array_format(ilist_data.type), 1)
        index_list_texref.set_flags(cuda.TRSF_READ_AS_INTEGER)

        func = mod.get_function("apply_flux")
        block = (fplan.threads_per_face(), fplan.parallel_faces, 1)
        func.prepare(
                (2+len(self.fluxes))*"P",
                texrefs=expr_to_texture_map.values()
                + [index_list_texref])

        if "cuda_flux" in discr.debug:
            print "flux: lmem=%d smem=%d regs=%d" % (
                    func.local_size_bytes,
                    func.shared_size_bytes,
                    func.num_regs)

        return block, func, expr_to_texture_map
开发者ID:felipeh,项目名称:hedge,代码行数:101,代码来源:fluxgather.py


注:本文中的pycuda.driver.dtype_to_array_format函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。