本文整理汇总了Python中pycuda.tools.dtype_to_ctype函数的典型用法代码示例。如果您正苦于以下问题:Python dtype_to_ctype函数的具体用法?Python dtype_to_ctype怎么用?Python dtype_to_ctype使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dtype_to_ctype函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_eigsq_kernel
def _get_eigsq_kernel(dtype_s, dtype_q):
template = """
#include <pycuda/pycuda-complex.hpp>
__global__ void
eigsq_Kernel(%(types)s* d_S, %(typeq)s* d_q, %(types)s thres, int size)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
int total = blockDim.x * gridDim.x;
for(int i = tid; i < size; i += total)
{
%(types)s s = d_S[i];
%(typeq)s q = d_q[i];
if(fabs%(iff)s(s) > thres)
{
d_q[i] = q / s;
}else
{
d_q[i] = 0.0;
}
}
}
"""
mod = SourceModule(template % {
"types": dtype_to_ctype(dtype_s),
"typeq": dtype_to_ctype(dtype_q),
"iff": "f" if dtype_q == np.float32 else ""})
func = mod.get_function("eigsq_Kernel")
func.prepare([np.intp, np.intp,
np.double if dtype_s == np.double else np.float32,
np.int32])
return func
示例2: get_by_index
def get_by_index(src_gpu, ind):
"""
Get values in a GPUArray by index.
Parameters
----------
src_gpu : pycuda.gpuarray.GPUArray
GPUArray instance from which to extract values.
ind : pycuda.gpuarray.GPUArray or numpy.ndarray
Array of element indices to set. Must have an integer dtype.
Returns
-------
res_gpu : pycuda.gpuarray.GPUArray
GPUArray with length of `ind` and dtype of `src_gpu` containing
selected values.
Examples
--------
>>> import pycuda.gpuarray as gpuarray
>>> import pycuda.autoinit
>>> import numpy as np
>>> import misc
>>> src = np.random.rand(5).astype(np.float32)
>>> src_gpu = gpuarray.to_gpu(src)
>>> ind = gpuarray.to_gpu(np.array([0, 2, 4]))
>>> res_gpu = misc.get_by_index(src_gpu, ind)
>>> np.allclose(res_gpu.get(), src[[0, 2, 4]])
True
Notes
-----
Only supports 1D index arrays.
May not be efficient for certain index patterns because of lack of inability
to coalesce memory operations.
"""
# Only support 1D index arrays:
assert len(np.shape(ind)) == 1
assert issubclass(ind.dtype.type, numbers.Integral)
N = len(ind)
if not isinstance(ind, gpuarray.GPUArray):
ind = gpuarray.to_gpu(ind)
dest_gpu = gpuarray.empty(N, dtype=src_gpu.dtype)
# Manually handle empty index array because it will cause the kernel to
# fail if processed:
if N == 0:
return dest_gpu
try:
func = get_by_index.cache[(src_gpu.dtype, ind.dtype)]
except KeyError:
data_ctype = tools.dtype_to_ctype(src_gpu.dtype)
ind_ctype = tools.dtype_to_ctype(ind.dtype)
v = "{data_ctype} *dest, {ind_ctype} *ind, {data_ctype} *src".format(data_ctype=data_ctype, ind_ctype=ind_ctype)
func = elementwise.ElementwiseKernel(v, "dest[i] = src[ind[i]]")
get_by_index.cache[(src_gpu.dtype, ind.dtype)] = func
func(dest_gpu, ind, src_gpu, range=slice(0, N, 1))
return dest_gpu
示例3: __init__
def __init__(self, dtype_out,
neutral, reduce_expr, map_expr=None, arguments=None,
name="reduce_kernel", keep=False, options=None, preamble=""):
self.dtype_out = dtype_out
self.block_size = 512
s1_func, self.stage1_arg_types = get_reduction_kernel_and_types(
dtype_to_ctype(dtype_out), self.block_size,
neutral, reduce_expr, map_expr,
arguments, name=name+"_stage1", keep=keep, options=options,
preamble=preamble)
self.stage1_func = s1_func.prepared_async_call
# stage 2 has only one input and no map expression
s2_func, self.stage2_arg_types = get_reduction_kernel_and_types(
dtype_to_ctype(dtype_out), self.block_size,
neutral, reduce_expr,
name=name+"_stage2", keep=keep, options=options,
preamble=preamble)
self.stage2_func = s2_func.prepared_async_call
assert [i for i, arg_tp in enumerate(self.stage1_arg_types) if arg_tp == "P"], \
"ReductionKernel can only be used with functions that have at least one " \
"vector argument"
示例4: get_complex_function
def get_complex_function(real_type, imag_type, result_type, pitch = True):
type_real = dtype_to_ctype(real_type)
type_imag = dtype_to_ctype(imag_type)
type_result = dtype_to_ctype(result_type)
name = "makecomplex"
if pitch:
func = SourceModule(
pitch_complex_template % {
"name": name,
"real_type": type_real,
"imag_type": type_imag,
"result_type": type_result
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('iiPiPPi')#[np.int32, np.int32, np.intp, np.int32,
# np.intp, np.intp, np.int32])
else:
func = SourceModule(
non_pitch_complex_template % {
"name": name,
"real_type": type_real,
"imag_type": type_imag,
"result_type": type_result
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('PPPi')#[np.intp, np.intp, np.intp, np.int32])
return func
示例5: get_complex_from_amp_function
def get_complex_from_amp_function(in_type, result_type, pitch = True):
type_in = dtype_to_ctype(in_type)
type_result = dtype_to_ctype(result_type)
name = "makecomplex_amp_phase"
if pitch:
func = SourceModule(
pitch_complex_amp_template % {
"name": name,
"in_type": type_in,
"result_type": type_result,
"fletter": 'f' if in_type == np.float32 else ''
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('iiPiPPi')#[np.int32, np.int32, np.intp, np.int32,
# np.intp, np.intp, np.int32])
else:
func = SourceModule(
non_pitch_complex_amp_template % {
"name": name,
"in_type": type_in,
"result_type": type_result,
"fletter": 'f' if in_type == np.float32 else ''
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('PPPi')#[np.intp, np.intp, np.intp, np.int32])
return func
示例6: get_divarray_function
def get_divarray_function(left_dtype, right_dtype, rslt_dtype, pitch = True):
type_left = dtype_to_ctype(left_dtype)
type_right = dtype_to_ctype(right_dtype)
type_rslt = dtype_to_ctype(rslt_dtype)
name = "divarray"
operation = "/"
if pitch:
func = SourceModule(
pitch_array_op_template % {
"name": name,
"dest_type": type_rslt,
"left_type": type_left,
"right_type": type_right,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('iiPiPiPi')#[np.int32, np.int32, np.intp, np.int32,
# np.intp, np.int32, np.intp, np.int32])
else:
func = SourceModule(
non_pitch_array_op_template % {
"name": name,
"dest_type": type_rslt,
"left_type": type_left,
"right_type": type_right,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('PPPi')#[np.intp, np.intp, np.intp, np.int32])
return func
示例7: get_scalardiv_function
def get_scalardiv_function(src_type, dest_type, pitch = True):
type_src = dtype_to_ctype(src_type)
type_dest = dtype_to_ctype(dest_type)
name = "scalardiv"
operation = "/"
if pitch:
func = SourceModule(
pitch_right_scalar_op_template % {
"name": name,
"src_type": type_src,
"dest_type": type_dest,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('iiPiPi'+np.dtype(dest_type).char)#[np.int32, np.int32, np.intp, np.int32,
# np.intp, np.int32, _get_type(dest_type)])
else:
func = SourceModule(
non_pitch_right_scalar_op_template % {
"name": name,
"src_type": type_src,
"dest_type": type_dest,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('PP'+np.dtype(dest_type).char+'i')#[np.intp, np.intp, _get_type(dest_type), np.int32])
return func
示例8: get_powscalar_function
def get_powscalar_function(src_type, dest_type, pitch = True):
type_src = dtype_to_ctype(src_type)
type_dest = dtype_to_ctype(dest_type)
name = "powscalar"
operation = "pow"
if pitch:
func = SourceModule(
pitch_left_scalar_func_template % {
"name": name,
"src_type": type_src,
"dest_type": type_dest,
"operation": operation,
"fletter": 'f' if src_type == np.float32 else '',
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('iiPiPi'+np.dtype(dest_type).char)#[np.int32, np.int32, np.intp, np.int32,
# np.intp, np.int32, _get_type(dest_type)])
else:
func = SourceModule(
non_pitch_left_scalar_func_template % {
"name": name,
"src_type": type_src,
"dest_type": type_dest,
"operation": operation,
"fletter": 'f' if src_type == np.float32 else '',
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('PP'+np.dtype(dest_type).char+'i')#[np.intp, np.intp, _get_type(dest_type), np.int32])
return func
示例9: get_astype_function
def get_astype_function(dtype_dest, dtype_src, pitch = True):
type_dest = dtype_to_ctype(dtype_dest)
type_src = dtype_to_ctype(dtype_src)
name = "astype"
operation = ""
if pitch:
func = SourceModule(
pitch_template % {
"name": name,
"dest_type": type_dest,
"src_type": type_src,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('iiPiPi')
# [np.int32, np.int32, np.intp, np.int32, np.intp, np.int32])
else:
func = SourceModule(
non_pitch_template % {
"name": name,
"dest_type": type_dest,
"src_type": type_src,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('PPi')#[np.intp, np.intp, np.int32])
return func
示例10: get_angle_function
def get_angle_function(dtypein, dtypeout, pitch = True):
type_src = dtype_to_ctype(dtypein)
type_dest = dtype_to_ctype(dtypeout)
name = "angle_function"
if dtypeout == np.float32:
fletter = "f"
else:
fletter = ""
if pitch:
func = SourceModule(
pitch_angle_template % {
"name": name,
"dest_type": type_dest,
"src_type": type_src,
"fletter": fletter,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('iiPiPi')
# [np.int32, np.int32, np.intp, np.int32, np.intp, np.int32])
else:
func = SourceModule(
non_pitch_angle_template % {
"name": name,
"dest_type": type_dest,
"src_type": type_src,
"fletter": fletter,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare('PPi')#[np.intp, np.intp, np.int32])
return func
示例11: get_dot_kernel
def get_dot_kernel(dtype_out, dtype_a, dtype_b):
return ReductionKernel(dtype_out, neutral="0",
reduce_expr="a+b", map_expr="a[i]*b[i]",
arguments="const %(tp_a)s *a, const %(tp_b)s *b" % {
"tp_a": dtype_to_ctype(dtype_a),
"tp_b": dtype_to_ctype(dtype_b),
}, keep=True)
示例12: get_take_kernel
def get_take_kernel(dtype, idx_dtype, vec_count=1):
ctx = {
"idx_tp": dtype_to_ctype(idx_dtype),
"tp": dtype_to_ctype(dtype),
"tex_tp": dtype_to_ctype(dtype, with_fp_tex_hack=True),
}
args = [VectorArg(idx_dtype, "idx")] + [
VectorArg(dtype, "dest"+str(i))for i in range(vec_count)] + [
ScalarArg(np.intp, "n")
]
preamble = "#include <pycuda-helpers.hpp>\n\n" + "\n".join(
"texture <%s, 1, cudaReadModeElementType> tex_src%d;" % (ctx["tex_tp"], i)
for i in range(vec_count))
body = (
("%(idx_tp)s src_idx = idx[i];\n" % ctx)
+ "\n".join(
"dest%d[i] = fp_tex1Dfetch(tex_src%d, src_idx);" % (i, i)
for i in range(vec_count)))
mod = get_elwise_module(args, body, "take", preamble=preamble)
func = mod.get_function("take")
tex_src = [mod.get_texref("tex_src%d" % i) for i in range(vec_count)]
func.prepare("P"+(vec_count*"P")+np.dtype(np.uintp).char, texrefs=tex_src)
return func, tex_src
示例13: get_divarray_function
def get_divarray_function(left_dtype, right_dtype, rslt_dtype, pitch = True):
type_left = dtype_to_ctype(left_dtype)
type_right = dtype_to_ctype(right_dtype)
type_rslt = dtype_to_ctype(rslt_dtype)
name = "divarray"
operation = "/"
if pitch:
func = func_compile(name, pitch_array_op_template % {"name": name,
"dest_type": type_rslt,
"left_type": type_left,
"right_type": type_right,
"operation": operation,
})
func.prepare([np.int32, np.int32, np.intp, np.int32, np.intp, np.int32, np.intp, np.int32])
else:
func = func_compile(name, non_pitch_array_op_template % {"name": name,
"dest_type": type_rslt,
"left_type": type_left,
"right_type": type_right,
"operation": operation,
})
func.prepare([np.intp, np.intp, np.intp, np.int32])
return func
示例14: get_divscalar_function
def get_divscalar_function(src_type, dest_type, pitch = True):
type_src = dtype_to_ctype(src_type)
type_dest = dtype_to_ctype(dest_type)
name = "divscalar"
operation = "/"
if pitch:
func = SourceModule(
pitch_left_scalar_op_template % {
"name": name,
"src_type": type_src,
"dest_type": type_dest,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare([np.int32, np.int32, np.intp, np.int32,
np.intp, np.int32, _get_type(dest_type)])
else:
func = SourceModule(
non_pitch_left_scalar_op_template % {
"name": name,
"src_type": type_src,
"dest_type": type_dest,
"operation": operation,
},
options=["--ptxas-options=-v"]).get_function(name)
func.prepare([np.intp, np.intp, _get_type(dest_type), np.int32])
return func
示例15: get_accum_diff_sq_kernel
def get_accum_diff_sq_kernel(dtype_x, dtype_z):
return ElementwiseKernel(
"%(tp_a)s *x, %(tp_c)s *z" % {
"tp_a": dtype_to_ctype(dtype_x),
"tp_c": dtype_to_ctype(dtype_z),
},
"x[i] += norm(z[i]) ",
"chisq_accum")