本文整理汇总了Python中pycuda.driver.dtype_to_array_format函数的典型用法代码示例。如果您正苦于以下问题:Python dtype_to_array_format函数的具体用法?Python dtype_to_array_format怎么用?Python dtype_to_array_format使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dtype_to_array_format函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bind_to_texref_ext
def bind_to_texref_ext(self, texref, channels=1, allow_double_hack=False,
allow_offset=False):
if not self.flags.forc:
raise RuntimeError("only contiguous arrays may "
"be used as arguments to this operation")
if self.dtype == np.float64 and allow_double_hack:
if channels != 1:
raise ValueError(
"'fake' double precision textures can "
"only have one channel")
channels = 2
fmt = drv.array_format.SIGNED_INT32
read_as_int = True
else:
fmt = drv.dtype_to_array_format(self.dtype)
read_as_int = np.integer in self.dtype.type.__mro__
offset = texref.set_address(self.gpudata, self.nbytes, allow_offset=allow_offset)
texref.set_format(fmt, channels)
if read_as_int:
texref.set_flags(texref.get_flags() | drv.TRSF_READ_AS_INTEGER)
return offset/self.dtype.itemsize
示例2: create_3d_texture
def create_3d_texture(a, module, variable, point_sampling=False):
a = numpy.asfortranarray(a)
w, h, d = a.shape
descr = cuda.ArrayDescriptor3D()
descr.width = w
descr.height = h
descr.depth = d
descr.format = cuda.dtype_to_array_format(a.dtype)
descr.num_channels = 1
descr.flags = 0
ary = cuda.Array(descr)
copy = cuda.Memcpy3D()
copy.set_src_host(a)
copy.set_dst_array(ary)
copy.width_in_bytes = copy.src_pitch = a.strides[1]
copy.src_height = copy.height = h
copy.depth = d
copy()
out_texref = module.get_texref(variable)
out_texref.set_array(ary)
if point_sampling:
out_texref.set_filter_mode(cuda.filter_mode.POINT)
else:
out_texref.set_filter_mode(cuda.filter_mode.LINEAR)
return out_texref
示例3: gpuArray3DtocudaArray
def gpuArray3DtocudaArray( gpuArray, allowSurfaceBind=False, precision='float' ):
#import pycuda.autoinit
d, h, w = gpuArray.shape
descr3D = cuda.ArrayDescriptor3D()
descr3D.width = w
descr3D.height = h
descr3D.depth = d
if precision == 'float':
descr3D.format = cuda.dtype_to_array_format(gpuArray.dtype)
descr3D.num_channels = 1
elif precision == 'double':
descr3D.format = cuda.array_format.SIGNED_INT32
descr3D.num_channels = 2
else:
print "ERROR: CUDA_ARRAY incompatible precision"
sys.exit()
descr3D.flags = 0
if allowSurfaceBind:
descr3D.flags = cuda.array3d_flags.SURFACE_LDST
cudaArray = cuda.Array(descr3D)
copy3D = cuda.Memcpy3D()
copy3D.set_src_device(gpuArray.ptr)
copy3D.set_dst_array(cudaArray)
copy3D.width_in_bytes = copy3D.src_pitch = gpuArray.strides[1]
copy3D.src_height = copy3D.height = h
copy3D.depth = d
copy3D()
return cudaArray, copy3D
示例4: np3d_to_device_array
def np3d_to_device_array(np_array, allow_surface_bind=True):
d, h, w = np_array.shape
descr = drv.ArrayDescriptor3D()
descr.width = w
descr.height = h
descr.depth = d
descr.format = drv.dtype_to_array_format(np_array.dtype)
descr.num_channels = 1
descr.flags = 0
if allow_surface_bind:
descr.flags = drv.array3d_flags.SURFACE_LDST
device_array = drv.Array(descr)
copy = drv.Memcpy3D()
copy.set_src_host(np_array)
copy.set_dst_array(device_array)
copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
copy.src_height = copy.height = h
copy.depth = d
copy()
return device_array
示例5: test_3d_texture
def test_3d_texture(self):
# adapted from code by Nicolas Pinto
w = 2
h = 4
d = 8
shape = (w, h, d)
a = np.asarray(
np.random.randn(*shape),
dtype=np.float32, order="F")
descr = drv.ArrayDescriptor3D()
descr.width = w
descr.height = h
descr.depth = d
descr.format = drv.dtype_to_array_format(a.dtype)
descr.num_channels = 1
descr.flags = 0
ary = drv.Array(descr)
copy = drv.Memcpy3D()
copy.set_src_host(a)
copy.set_dst_array(ary)
copy.width_in_bytes = copy.src_pitch = a.strides[1]
copy.src_height = copy.height = h
copy.depth = d
copy()
mod = SourceModule("""
texture<float, 3, cudaReadModeElementType> mtx_tex;
__global__ void copy_texture(float *dest)
{
int x = threadIdx.x;
int y = threadIdx.y;
int z = threadIdx.z;
int dx = blockDim.x;
int dy = blockDim.y;
int i = (z*dy + y)*dx + x;
dest[i] = tex3D(mtx_tex, x, y, z);
//dest[i] = x;
}
""")
copy_texture = mod.get_function("copy_texture")
mtx_tex = mod.get_texref("mtx_tex")
mtx_tex.set_array(ary)
dest = np.zeros(shape, dtype=np.float32, order="F")
copy_texture(drv.Out(dest), block=shape, texrefs=[mtx_tex])
assert la.norm(dest-a) == 0
示例6: malloc_gpu_arrays
def malloc_gpu_arrays(nx, ny, nz, cex, cey, cez):
print "rank= %d, (%d, %d, %d)" % (rank, nx, ny, nz),
total_bytes = nx * ny * nz * 4 * 9
if total_bytes / (1024 ** 3) == 0:
print "%d MB" % (total_bytes / (1024 ** 2))
else:
print "%1.2f GB" % (float(total_bytes) / (1024 ** 3))
if nz % Dx != 0:
print "Error: nz is not multiple of %d" % (Dx)
sys.exit()
if (nx * ny) % Dy != 0:
print "Error: nx*ny is not multiple of %d" % (Dy)
sys.exit()
f = np.zeros((nx, ny, nz), "f")
ex_gpu = cuda.to_device(f)
ey_gpu = cuda.to_device(f)
ez_gpu = cuda.to_device(f)
hx_gpu = cuda.to_device(f)
hy_gpu = cuda.to_device(f)
hz_gpu = cuda.to_device(f)
descr = cuda.ArrayDescriptor3D()
descr.width = nz
descr.height = ny
descr.depth = nx
descr.format = cuda.dtype_to_array_format(f.dtype)
descr.num_channels = 1
descr.flags = 0
tcex_gpu = cuda.Array(descr)
tcey_gpu = cuda.Array(descr)
tcez_gpu = cuda.Array(descr)
mcopy = cuda.Memcpy3D()
mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
mcopy.src_height = mcopy.height = ny
mcopy.depth = nx
mcopy.set_src_host(cex)
mcopy.set_dst_array(tcex_gpu)
mcopy()
mcopy.set_src_host(cey)
mcopy.set_dst_array(tcey_gpu)
mcopy()
mcopy.set_src_host(cez)
mcopy.set_dst_array(tcez_gpu)
mcopy()
eh_fields = [ex_gpu, ey_gpu, ez_gpu, hx_gpu, hy_gpu, hz_gpu]
tex_fields = [tcex_gpu, tcey_gpu, tcez_gpu]
return eh_fields, tex_fields
示例7: gpuArray2DtocudaArray
def gpuArray2DtocudaArray( gpuArray ):
#import pycuda.autoinit
h, w = gpuArray.shape
descr2D = cuda.ArrayDescriptor()
descr2D.width = w
descr2D.height = h
descr2D.format = cuda.dtype_to_array_format(gpuArray.dtype)
descr2D.num_channels = 1
cudaArray = cuda.Array(descr2D)
copy2D = cuda.Memcpy2D()
copy2D.set_src_device(gpuArray.ptr)
copy2D.set_dst_array(cudaArray)
copy2D.src_pitch = gpuArray.strides[0]
copy2D.width_in_bytes = copy2D.src_pitch = gpuArray.strides[0]
copy2D.src_height = copy2D.height = h
copy2D(aligned=True)
return cudaArray, copy2D
示例8: np2DtoCudaArray
def np2DtoCudaArray( npArray, allowSurfaceBind=False ):
#import pycuda.autoinit
h, w = npArray.shape
descr2D = cuda.ArrayDescriptor()
descr2D.width = w
descr2D.height = h
descr2D.format = cuda.dtype_to_array_format(npArray.dtype)
descr2D.num_channels = 1
if allowSurfaceBind:
descr.flags = cuda.array3d_flags.SURFACE_LDST
cudaArray = cuda.Array(descr2D)
copy2D = cuda.Memcpy2D()
copy2D.set_src_host(npArray)
copy2D.set_dst_array(cudaArray)
copy2D.src_pitch = npArray.strides[0]
copy2D.width_in_bytes = copy2D.src_pitch = npArray.strides[0]
copy2D.src_height = copy2D.height = h
copy2D(aligned=True)
return cudaArray, descr2D
示例9: bind_to_texref_ext
def bind_to_texref_ext(self, texref, channels=1, allow_double_hack=False,
allow_offset=False):
if self.dtype == numpy.float64 and allow_double_hack:
if channels != 1:
raise ValueError, "'fake' double precision textures can only have one channel"
channels = 2
fmt = drv.array_format.SIGNED_INT32
read_as_int = True
else:
fmt = drv.dtype_to_array_format(self.dtype)
read_as_int = numpy.integer in self.dtype.type.__mro__
offset = texref.set_address(self.gpudata, self.nbytes, allow_offset=allow_offset)
texref.set_format(fmt, channels)
if read_as_int:
texref.set_flags(texref.get_flags() | drv.TRSF_READ_AS_INTEGER)
return offset/self.dtype.itemsize
示例10: np3DtoCudaArray
def np3DtoCudaArray( npArray, allowSurfaceBind=False ):
#import pycuda.autoinit
d, h, w = npArray.shape
descr3D = cuda.ArrayDescriptor3D()
descr3D.width = w
descr3D.height = h
descr3D.depth = d
descr3D.format = cuda.dtype_to_array_format(npArray.dtype)
descr3D.num_channels = 1
descr3D.flags = 0
if allowSurfaceBind:
descr3D.flags = cuda.array3d_flags.SURFACE_LDST
cudaArray = cuda.Array(descr3D)
copy3D = cuda.Memcpy3D()
copy3D.set_src_host(npArray)
copy3D.set_dst_array(cudaArray)
copy3D.width_in_bytes = copy3D.src_pitch = npArray.strides[1]
copy3D.src_height = copy3D.height = h
copy3D.depth = d
copy3D()
return cudaArray
示例11: _prepare_F_texture
def _prepare_F_texture(self):
descr = drv.ArrayDescriptor3D()
descr.width = self.side
descr.height = self.side
descr.depth = self.side
descr.format = drv.dtype_to_array_format(self.F_gpu.dtype)
descr.num_channels = 1
descr.flags = 0
F_array = drv.Array(descr)
copy = drv.Memcpy3D()
copy.set_src_device(self.F_gpu.gpudata)
copy.set_dst_array(F_array)
copy.width_in_bytes = copy.src_pitch = self.F_gpu.strides[1]
copy.src_height = copy.height = self.side
copy.depth = self.side
self.F_gpu_to_array_copy = copy
self.F_gpu_to_array_copy()
self.F_texture.set_array(F_array)
示例12: numpy3d_to_array
def numpy3d_to_array(np_array, order=None):
'''
Method for copying a numpy array to a CUDA array
If you get a buffer error, run this method on np_array.copy('F')
'''
from pycuda.driver import Array, ArrayDescriptor3D, Memcpy3D, dtype_to_array_format
if order is None:
order = 'C' if np_array.strides[0] > np_array.strides[2] else 'F'
if order.upper() == 'C':
d, h, w = np_array.shape
elif order.upper() == "F":
w, h, d = np_array.shape
else:
raise Exception("order must be either F or C")
descr = ArrayDescriptor3D()
descr.width = w
descr.height = h
descr.depth = d
descr.format = dtype_to_array_format(np_array.dtype)
descr.num_channels = 1
descr.flags = 0
device_array = Array(descr)
copy = Memcpy3D()
copy.set_src_host(np_array)
copy.set_dst_array(device_array)
copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
copy.src_height = copy.height = h
copy.depth = d
copy()
return device_array
示例13: alloc_coeff_arrays
def alloc_coeff_arrays(s):
f = np.zeros((s.nx, s.ny, s.nz), 'f')
s.cex = np.ones_like(f)*0.5
s.cex[:,-1,:] = 0
s.cex[:,:,-1] = 0
s.cey = np.ones_like(f)*0.5
s.cey[:,:,-1] = 0
s.cey[-1,:,:] = 0
s.cez = np.ones_like(f)*0.5
s.cez[-1,:,:] = 0
s.cez[:,-1,:] = 0
descr = cuda.ArrayDescriptor3D()
descr.width = s.nz
descr.height = s.ny
descr.depth = s.nx
descr.format = cuda.dtype_to_array_format(f.dtype)
descr.num_channels = 1
descr.flags = 0
s.tcex_gpu = cuda.Array(descr)
s.tcey_gpu = cuda.Array(descr)
s.tcez_gpu = cuda.Array(descr)
mcpy = cuda.Memcpy3D()
mcpy.width_in_bytes = mcpy.src_pitch = f.strides[1]
mcpy.src_height = mcpy.height = s.ny
mcpy.depth = s.nx
mcpy.set_src_host( s.cex )
mcpy.set_dst_array( s.tcex_gpu )
mcpy()
mcpy.set_src_host( s.cey )
mcpy.set_dst_array( s.tcey_gpu )
mcpy()
mcpy.set_src_host( s.cez )
mcpy.set_dst_array( s.tcez_gpu )
mcpy()
示例14:
# memory allocate
f = np.zeros((nx,ny,nz),'f',order='F')
ex_gpu = cuda.to_device(f)
ey_gpu = cuda.to_device(f)
ez_gpu = cuda.to_device(f)
hx_gpu = cuda.to_device(f)
hy_gpu = cuda.to_device(f)
hz_gpu = cuda.to_device(f)
descr = cuda.ArrayDescriptor3D()
descr.width = nx
descr.height = ny
descr.depth = nz
descr.format = cuda.dtype_to_array_format(f.dtype)
descr.num_channels = 1
descr.flags = 0
cex_gpu = cuda.Array(descr)
cey_gpu = cuda.Array(descr)
cez_gpu = cuda.Array(descr)
chx_gpu = cuda.Array(descr)
chy_gpu = cuda.Array(descr)
chz_gpu = cuda.Array(descr)
mcopy = cuda.Memcpy3D()
mcopy.width_in_bytes = mcopy.src_pitch = f.strides[1]
mcopy.src_height = mcopy.height = ny
mcopy.depth = nz
示例15: get_kernel
#.........这里部分代码省略.........
self.write_interior_flux_code(True))
),
Line(),
Comment("work around nvcc assertion failure"),
S("fpair_nr+=1"),
S("fpair_nr-=1"),
Line(),
Comment("fluxes for single-sided (inter-block) interior face pairs"),
While("fpair_nr < data.header.diff_facepairs_end",
get_flux_code(lambda:
self.write_interior_flux_code(False))
),
Line(),
Comment("fluxes for single-sided boundary face pairs"),
While("fpair_nr < data.header.bdry_facepairs_end",
get_flux_code(
lambda: self.write_boundary_flux_code(for_benchmark))
),
])
f_body.extend_log_block("compute the fluxes", [
Initializer(POD(numpy.uint32, "fpair_nr"), "BLOCK_FACE"),
If("FACEDOF_NR < DOFS_PER_FACE", flux_computation)
])
if not fplan.direct_store:
f_body.extend([
Line(),
S("__syncthreads()"),
Line()
])
f_body.extend_log_block("store fluxes", [
#Assign("debugbuf[blockIdx.x]", "FOF_BLOCK_BASE"),
#Assign("debugbuf[0]", "FOF_BLOCK_BASE"),
#Assign("debugbuf[0]", "sizeof(face_pair)"),
For("unsigned word_nr = THREAD_NUM",
"word_nr < ALIGNED_FACE_DOFS_PER_MB*BLOCK_MB_COUNT",
"word_nr += COALESCING_THREAD_COUNT",
Block([Assign(
"gmem_fluxes_on_faces%d[FOF_BLOCK_BASE+word_nr]" % flux_nr,
"smem_fluxes_on_faces[%d][word_nr]" % flux_nr)
for flux_nr in range(len(self.fluxes))]
#+[If("isnan(smem_fluxes_on_faces[%d][word_nr])" % flux_nr,
#Block([
#Assign("debugbuf[blockIdx.x]", "word_nr"),
#])
#)
#for flux_nr in range(len(self.fluxes))]
)
)
])
if False:
f_body.extend([
Assign("debugbuf[blockIdx.x*96+32+BLOCK_FACE*32+threadIdx.x]", "fpair_nr"),
Assign("debugbuf[blockIdx.x*96+16]", "data.header.same_facepairs_end"),
Assign("debugbuf[blockIdx.x*96+17]", "data.header.diff_facepairs_end"),
Assign("debugbuf[blockIdx.x*96+18]", "data.header.bdry_facepairs_end"),
]
)
# finish off ----------------------------------------------------------
cmod.append(FunctionBody(f_decl, f_body))
if not for_benchmark and "cuda_dump_kernels" in discr.debug:
from hedge.tools import open_unique_debug_file
open_unique_debug_file("flux_gather", ".cu").write(str(cmod))
#from pycuda.tools import allow_user_edit
mod = SourceModule(
#allow_user_edit(cmod, "kernel.cu", "the flux kernel"),
cmod,
keep="cuda_keep_kernels" in discr.debug)
expr_to_texture_map = dict(
(dep_expr, mod.get_texref(
"field%d_tex" % self.dep_to_index[dep_expr]))
for dep_expr in self.all_deps)
index_list_texref = mod.get_texref("tex_index_lists")
index_list_texref.set_address(
ilist_data.device_memory,
ilist_data.bytes)
index_list_texref.set_format(
cuda.dtype_to_array_format(ilist_data.type), 1)
index_list_texref.set_flags(cuda.TRSF_READ_AS_INTEGER)
func = mod.get_function("apply_flux")
block = (fplan.threads_per_face(), fplan.parallel_faces, 1)
func.prepare(
(2+len(self.fluxes))*"P",
texrefs=expr_to_texture_map.values()
+ [index_list_texref])
if "cuda_flux" in discr.debug:
print "flux: lmem=%d smem=%d regs=%d" % (
func.local_size_bytes,
func.shared_size_bytes,
func.num_regs)
return block, func, expr_to_texture_map