本文整理汇总了Python中pyopencl.characterize.has_double_support函数的典型用法代码示例。如果您正苦于以下问题:Python has_double_support函数的具体用法?Python has_double_support怎么用?Python has_double_support使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了has_double_support函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_lin_comb_diff
def test_lin_comb_diff(ctx_factory, arg_type):
ctx = ctx_factory()
dev, = ctx.devices
if not has_double_support(dev):
if arg_type in (np.float64, np.complex128):
pytest.skip('Device does not support double.')
n = 100000
a_np = (np.random.randn(n)).astype(arg_type)
b_np = (np.random.randn(n)).astype(arg_type)
c_np = (np.random.randn(n) * 10).astype(arg_type)
queue = cl.CommandQueue(ctx)
a_g = cl.array.to_device(queue, a_np)
b_g = cl.array.to_device(queue, b_np)
c_g = cl.array.to_device(queue, c_np)
res_g = cl.array.empty_like(a_g)
lin_comb_diff = lin_comb_diff_kernel(ctx, arg_type, arg_type,
arg_type, np.float32, 2)
gs, ls = get_group_sizes(n, dev, lin_comb_diff)
evt = run_elwise_kernel(lin_comb_diff, queue, gs, ls, n, [],
res_g, c_g, a_g, b_g, 2, 3)
evt.wait()
# Check on GPU with PyOpenCL Array:
assert np.linalg.norm((res_g - (c_g + 2 * a_g + 3 * b_g)).get()) <= 2e-4
# Check on CPU with Numpy:
res_np = res_g.get()
assert np.linalg.norm(res_np - (c_np + 2 * a_np + 3 * b_np)) <= 2e-4
示例2: test
def test(ctx_factory):
context = ctx_factory()
queue = cl.CommandQueue(context)
gpu_func = getattr(clmath, name)
cpu_func = getattr(np, numpy_func_names.get(name, name))
if has_double_support(context.devices[0]):
if use_complex:
dtypes = [np.float32, np.float64, np.complex64, np.complex128]
else:
dtypes = [np.float32, np.float64]
else:
if use_complex:
dtypes = [np.float32, np.complex64]
else:
dtypes = [np.float32]
for s in sizes:
for dtype in dtypes:
dtype = np.dtype(dtype)
args = cl_array.arange(queue, a, b, (b - a) / s, dtype=dtype)
if dtype.kind == "c":
args = args + dtype.type(1j) * args
gpu_results = gpu_func(args).get()
cpu_results = cpu_func(args.get())
my_threshold = threshold
if dtype.kind == "c" and isinstance(use_complex, float):
my_threshold = use_complex
max_err = np.max(np.abs(cpu_results - gpu_results))
assert (max_err <= my_threshold).all(), (max_err, name, dtype)
示例3: get_write_kernel
def get_write_kernel(self, index_dtype):
index_ctype = dtype_to_ctype(index_dtype)
from pyopencl.tools import VectorArg, OtherArg
kernel_list_args = []
kernel_list_arg_values = ""
user_list_args = []
for name, dtype in self.list_names_and_dtypes:
list_name = "plb_%s_list" % name
list_arg = VectorArg(dtype, list_name)
kernel_list_args.append(list_arg)
user_list_args.append(list_arg)
if name in self.count_sharing:
kernel_list_arg_values += "%s, " % list_name
continue
kernel_list_args.append(VectorArg(index_dtype, "plb_%s_start_index" % name))
index_name = "plb_%s_index" % name
user_list_args.append(OtherArg("%s *%s" % (index_ctype, index_name), index_name))
kernel_list_arg_values += "%s, &%s, " % (list_name, index_name)
kernel_name = self.name_prefix + "_write"
from pyopencl.characterize import has_double_support
src = _LIST_BUILDER_TEMPLATE.render(
is_count_stage=False,
kernel_name=kernel_name,
double_support=all(has_double_support(dev) for dev in self.context.devices),
debug=self.debug,
do_not_vectorize=self.do_not_vectorize(),
kernel_list_arg_decl=_get_arg_decl(kernel_list_args),
kernel_list_arg_values=kernel_list_arg_values,
user_list_arg_decl=_get_arg_decl(user_list_args),
user_list_args=_get_arg_list(user_list_args),
user_arg_decl=_get_arg_decl(self.arg_decls),
user_args=_get_arg_list(self.arg_decls),
list_names_and_dtypes=self.list_names_and_dtypes,
count_sharing=self.count_sharing,
name_prefix=self.name_prefix,
generate_template=self.generate_template,
preamble=self.preamble,
index_type=index_ctype,
)
src = str(src)
prg = cl.Program(self.context, src).build(self.options)
knl = getattr(prg, kernel_name)
from pyopencl.tools import get_arg_list_scalar_arg_dtypes
knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(kernel_list_args + self.arg_decls) + [index_dtype])
return knl
示例4: test_astype
def test_astype(ctx_factory):
context = ctx_factory()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
if not has_double_support(context.devices[0]):
from pytest import skip
skip("double precision not supported on %s" % context.devices[0])
a_gpu = clrand(queue, (2000,), dtype=np.float32)
a = a_gpu.get().astype(np.float64)
a2 = a_gpu.astype(np.float64).get()
assert a2.dtype == np.float64
assert la.norm(a - a2) == 0, (a, a2)
a_gpu = clrand(queue, (2000,), dtype=np.float64)
a = a_gpu.get().astype(np.float32)
a2 = a_gpu.astype(np.float32).get()
assert a2.dtype == np.float32
assert la.norm(a - a2) / la.norm(a) < 1e-7
示例5: _get_reduction_source
def _get_reduction_source(
ctx, out_type, out_type_size,
neutral, reduce_expr, map_expr, parsed_args,
name="reduce_kernel", preamble="", arg_prep="",
device=None, max_group_size=None):
if device is not None:
devices = [device]
else:
devices = ctx.devices
# {{{ compute group size
def get_dev_group_size(device):
# dirty fix for the RV770 boards
max_work_group_size = device.max_work_group_size
if "RV770" in device.name:
max_work_group_size = 64
# compute lmem limit
from pytools import div_ceil
lmem_wg_size = div_ceil(max_work_group_size, out_type_size)
result = min(max_work_group_size, lmem_wg_size)
# round down to power of 2
from pyopencl.tools import bitlog2
return 2**bitlog2(result)
group_size = min(get_dev_group_size(dev) for dev in devices)
if max_group_size is not None:
group_size = min(max_group_size, group_size)
# }}}
from mako.template import Template
from pytools import all
from pyopencl.characterize import has_double_support
src = str(Template(KERNEL).render(
out_type=out_type,
arguments=", ".join(arg.declarator() for arg in parsed_args),
group_size=group_size,
neutral=neutral,
reduce_expr=_process_code_for_macro(reduce_expr),
map_expr=_process_code_for_macro(map_expr),
name=name,
preamble=preamble,
arg_prep=arg_prep,
double_support=all(has_double_support(dev) for dev in devices),
))
from pytools import Record
class ReductionInfo(Record):
pass
return ReductionInfo(
context=ctx,
source=src,
group_size=group_size)
示例6: test_dot
def test_dot(ctx_factory):
from pytest import importorskip
importorskip("mako")
context = ctx_factory()
queue = cl.CommandQueue(context)
dtypes = [np.float32, np.complex64]
if has_double_support(context.devices[0]):
dtypes.extend([np.float64, np.complex128])
for a_dtype in dtypes:
for b_dtype in dtypes:
print(a_dtype, b_dtype)
a_gpu = general_clrand(queue, (200000,), a_dtype)
a = a_gpu.get()
b_gpu = general_clrand(queue, (200000,), b_dtype)
b = b_gpu.get()
dot_ab = np.dot(a, b)
dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()
assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4
vdot_ab = np.vdot(a, b)
vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()
assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
示例7: __init__
def __init__(self, queue, num_work_items,
luxury=None, seed=None, no_warmup=False,
use_legacy_init=False, max_work_items=None):
if luxury is None:
luxury = 4
if seed is None:
from time import time
seed = int(time()*1e6) % 2<<30
self.context = queue.context
self.luxury = luxury
self.num_work_items = num_work_items
from pyopencl.characterize import has_double_support
self.support_double = has_double_support(queue.device)
self.no_warmup = no_warmup
self.use_legacy_init = use_legacy_init
self.max_work_items = max_work_items
src = """
%(defines)s
#include <pyopencl-ranluxcl.cl>
kernel void init_ranlux(unsigned seeds, global ranluxcl_state_t *ranluxcltab)
{
if (get_global_id(0) < %(num_work_items)d)
ranluxcl_initialization(seeds, ranluxcltab);
}
""" % {
"defines": self.generate_settings_defines(),
"num_work_items": num_work_items
}
prg = cl.Program(queue.context, src).build()
# {{{ compute work group size
wg_size = None
import sys
import platform
if ("darwin" in sys.platform
and "Apple" in queue.device.platform.vendor
and platform.mac_ver()[0].startswith("10.7")
and queue.device.type == cl.device_type.CPU):
wg_size = (1,)
self.wg_size = wg_size
# }}}
self.state = cl_array.empty(queue, (num_work_items, 112), dtype=np.uint8)
self.state.fill(17)
prg.init_ranlux(queue, (num_work_items,), self.wg_size, np.uint32(seed),
self.state.data)
示例8: get_count_kernel
def get_count_kernel(self, index_dtype):
index_ctype = dtype_to_ctype(index_dtype)
from pyopencl.tools import VectorArg, OtherArg
kernel_list_args = [
VectorArg(index_dtype, "plb_%s_count" % name)
for name, dtype in self.list_names_and_dtypes
if name not in self.count_sharing]
user_list_args = []
for name, dtype in self.list_names_and_dtypes:
if name in self.count_sharing:
continue
name = "plb_loc_%s_count" % name
user_list_args.append(OtherArg("%s *%s" % (
index_ctype, name), name))
kernel_name = self.name_prefix+"_count"
from pyopencl.characterize import has_double_support
src = _LIST_BUILDER_TEMPLATE.render(
is_count_stage=True,
kernel_name=kernel_name,
double_support=all(has_double_support(dev) for dev in
self.context.devices),
debug=self.debug,
do_not_vectorize=self.do_not_vectorize(),
eliminate_empty_output_lists=self.eliminate_empty_output_lists,
kernel_list_arg_decl=_get_arg_decl(kernel_list_args),
kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"),
user_list_arg_decl=_get_arg_decl(user_list_args),
user_list_args=_get_arg_list(user_list_args),
user_arg_decl_with_offset=_get_arg_decl(self.arg_decls),
user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset),
user_args_no_offset=_get_arg_list(self.arg_decls_no_offset),
arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls),
list_names_and_dtypes=self.list_names_and_dtypes,
count_sharing=self.count_sharing,
name_prefix=self.name_prefix,
generate_template=self.generate_template,
preamble=self.preamble,
index_type=index_ctype,
)
src = str(src)
prg = cl.Program(self.context, src).build(self.options)
knl = getattr(prg, kernel_name)
from pyopencl.tools import get_arg_list_scalar_arg_dtypes
knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(
kernel_list_args+self.arg_decls) + [index_dtype])
return knl
示例9: test_get_kernels
def test_get_kernels(ctx_factory, res_type, arg_type, weight_type):
ctx = ctx_factory()
dev, = ctx.devices
if not has_double_support(dev):
for t in res_type, arg_type, weight_type:
if t in (np.float64, np.complex128):
pytest.skip('Device does not support double.')
for length in range(1, 3):
lin_comb_kernel(ctx, res_type, arg_type, weight_type, length)
示例10: test_clrandom_dtypes
def test_clrandom_dtypes(ctx_factory, rng_class, dtype):
cl_ctx = ctx_factory()
if dtype == np.float64 and not has_double_support(cl_ctx.devices[0]):
pytest.skip("double precision not supported on this device")
rng = rng_class(cl_ctx)
size = 10
with cl.CommandQueue(cl_ctx) as queue:
rng.uniform(queue, size, dtype)
if dtype not in (np.int32, np.int64):
rng.normal(queue, size, dtype)
示例11: test_hankel_01_complex
def test_hankel_01_complex(ctx_factory, ref_src):
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
if not has_double_support(ctx.devices[0]):
from pytest import skip
skip("no double precision support--cannot test complex bessel function")
n = 10**6
np.random.seed(11)
z = (
np.logspace(-5, 2, n)
* np.exp(1j * 2 * np.pi * np.random.rand(n)))
def get_err(check, ref):
return np.max(np.abs(check-ref)) / np.max(np.abs(ref))
if ref_src == "pyfmmlib":
pyfmmlib = pytest.importorskip("pyfmmlib")
h0_ref, h1_ref = pyfmmlib.hank103_vec(z, ifexpon=1)
elif ref_src == "scipy":
spec = pytest.importorskip("scipy.special")
h0_ref = spec.hankel1(0, z)
h1_ref = spec.hankel1(1, z)
else:
raise ValueError("ref_src")
z_dev = cl_array.to_device(queue, z)
h0_dev, h1_dev = clmath.hankel_01(z_dev)
rel_err_h0 = np.abs(h0_dev.get() - h0_ref)/np.abs(h0_ref)
rel_err_h1 = np.abs(h1_dev.get() - h1_ref)/np.abs(h1_ref)
max_rel_err_h0 = np.max(rel_err_h0)
max_rel_err_h1 = np.max(rel_err_h1)
print("H0", max_rel_err_h0)
print("H1", max_rel_err_h1)
assert max_rel_err_h0 < 4e-13
assert max_rel_err_h1 < 2e-13
if 0:
import matplotlib.pyplot as pt
pt.loglog(np.abs(z), rel_err_h0)
pt.loglog(np.abs(z), rel_err_h1)
pt.show()
示例12: get_dot_kernel
def get_dot_kernel(ctx, dtype_out, dtype_a=None, dtype_b=None,
conjugate_first=False):
from pyopencl.characterize import has_double_support
map_expr, dtype_out, dtype_b = _get_dot_expr(
dtype_out, dtype_a, dtype_b, conjugate_first,
has_double_support=has_double_support(ctx.devices[0]))
return ReductionKernel(ctx, dtype_out, neutral="0",
reduce_expr="a+b", map_expr=map_expr,
arguments=
"const %(tp_a)s *a, "
"const %(tp_b)s *b" % {
"tp_a": dtype_to_ctype(dtype_a),
"tp_b": dtype_to_ctype(dtype_b),
})
示例13: test_random
def test_random(ctx_getter):
context = ctx_getter()
queue = cl.CommandQueue(context)
from pyopencl.clrandom import rand as clrand
if has_double_support(context.devices[0]):
dtypes = [np.float32, np.float64]
else:
dtypes = [np.float32]
for dtype in dtypes:
a = clrand(context, queue, (10, 100), dtype=dtype).get()
assert (0 <= a).all()
assert (a < 1).all()
示例14: test_random_float_in_range
def test_random_float_in_range(ctx_factory, rng_class, ary_size, plot_hist=False):
context = ctx_factory()
queue = cl.CommandQueue(context)
if has_double_support(context.devices[0]):
dtypes = [np.float32, np.float64]
else:
dtypes = [np.float32]
if rng_class is RanluxGenerator:
gen = rng_class(queue, 5120)
else:
gen = rng_class(context)
for dtype in dtypes:
print(dtype)
ran = cl_array.zeros(queue, ary_size, dtype)
gen.fill_uniform(ran)
if plot_hist:
import matplotlib.pyplot as pt
pt.hist(ran.get(), 30)
pt.show()
assert (0 <= ran.get()).all()
assert (ran.get() <= 1).all()
if rng_class is RanluxGenerator:
gen.synchronize(queue)
ran = cl_array.zeros(queue, ary_size, dtype)
gen.fill_uniform(ran, a=4, b=7)
ran_host = ran.get()
for cond in [4 <= ran_host, ran_host <= 7]:
good = cond.all()
if not good:
print(np.where(~cond))
print(ran_host[~cond])
assert good
ran = gen.normal(queue, ary_size, dtype, mu=10, sigma=3)
if plot_hist:
import matplotlib.pyplot as pt
pt.hist(ran.get(), 30)
pt.show()
示例15: test_bitonic_argsort
def test_bitonic_argsort(ctx_factory, size, dtype):
import sys
is_pypy = '__pypy__' in sys.builtin_module_names
if not size and is_pypy:
# https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array
pytest.xfail("pypy doesn't seem to handle as_strided "
"on zero-sized arrays very well")
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
dev = ctx.devices[0]
if (dev.platform.name == "Portable Computing Language"
and sys.platform == "darwin"):
pytest.xfail("Bitonic sort crashes on Apple POCL")
if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
"parallelism")
if (dev.platform.name == "Portable Computing Language"
and dtype == np.float64
and get_pocl_version(dev.platform) < (1, 0)):
pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0")
if dtype == np.float64 and not has_double_support(dev):
from pytest import skip
skip("double precision not supported on %s" % dev)
import pyopencl.clrandom as clrandom
from pyopencl.bitonic_sort import BitonicSort
index = cl_array.arange(queue, 0, size, 1, dtype=np.int32)
m = clrandom.rand(queue, (size,), dtype, luxury=None, a=0, b=239432234)
sorterm = BitonicSort(ctx)
ms, evt = sorterm(m.copy(), idx=index, axis=0)
assert np.array_equal(np.sort(m.get()), ms.get())
# may be False because of identical values in array
# assert np.array_equal(np.argsort(m.get()), index.get())
# Check values by indices
assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])