当前位置: 首页>>代码示例>>Python>>正文


Python characterize.has_double_support函数代码示例

本文整理汇总了Python中pyopencl.characterize.has_double_support函数的典型用法代码示例。如果您正苦于以下问题:Python has_double_support函数的具体用法?Python has_double_support怎么用?Python has_double_support使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了has_double_support函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_lin_comb_diff

def test_lin_comb_diff(ctx_factory, arg_type):
    ctx = ctx_factory()
    dev, = ctx.devices
    if not has_double_support(dev):
        if arg_type in (np.float64, np.complex128):
            pytest.skip('Device does not support double.')
    n = 100000
    a_np = (np.random.randn(n)).astype(arg_type)
    b_np = (np.random.randn(n)).astype(arg_type)
    c_np = (np.random.randn(n) * 10).astype(arg_type)
    queue = cl.CommandQueue(ctx)

    a_g = cl.array.to_device(queue, a_np)
    b_g = cl.array.to_device(queue, b_np)
    c_g = cl.array.to_device(queue, c_np)
    res_g = cl.array.empty_like(a_g)
    lin_comb_diff = lin_comb_diff_kernel(ctx, arg_type, arg_type,
                                         arg_type, np.float32, 2)
    gs, ls = get_group_sizes(n, dev, lin_comb_diff)

    evt = run_elwise_kernel(lin_comb_diff, queue, gs, ls, n, [],
                            res_g, c_g, a_g, b_g, 2, 3)
    evt.wait()

    # Check on GPU with PyOpenCL Array:
    assert np.linalg.norm((res_g - (c_g + 2 * a_g + 3 * b_g)).get()) <= 2e-4

    # Check on CPU with Numpy:
    res_np = res_g.get()
    assert np.linalg.norm(res_np - (c_np + 2 * a_np + 3 * b_np)) <= 2e-4
开发者ID:yuyichao,项目名称:pyscical,代码行数:30,代码来源:test_elwise.py

示例2: test

    def test(ctx_factory):
        context = ctx_factory()
        queue = cl.CommandQueue(context)

        gpu_func = getattr(clmath, name)
        cpu_func = getattr(np, numpy_func_names.get(name, name))

        if has_double_support(context.devices[0]):
            if use_complex:
                dtypes = [np.float32, np.float64, np.complex64, np.complex128]
            else:
                dtypes = [np.float32, np.float64]
        else:
            if use_complex:
                dtypes = [np.float32, np.complex64]
            else:
                dtypes = [np.float32]

        for s in sizes:
            for dtype in dtypes:
                dtype = np.dtype(dtype)

                args = cl_array.arange(queue, a, b, (b - a) / s, dtype=dtype)
                if dtype.kind == "c":
                    args = args + dtype.type(1j) * args

                gpu_results = gpu_func(args).get()
                cpu_results = cpu_func(args.get())

                my_threshold = threshold
                if dtype.kind == "c" and isinstance(use_complex, float):
                    my_threshold = use_complex

                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= my_threshold).all(), (max_err, name, dtype)
开发者ID:abergeron,项目名称:pyopencl,代码行数:35,代码来源:test_clmath.py

示例3: get_write_kernel

    def get_write_kernel(self, index_dtype):
        index_ctype = dtype_to_ctype(index_dtype)
        from pyopencl.tools import VectorArg, OtherArg

        kernel_list_args = []
        kernel_list_arg_values = ""
        user_list_args = []

        for name, dtype in self.list_names_and_dtypes:
            list_name = "plb_%s_list" % name
            list_arg = VectorArg(dtype, list_name)

            kernel_list_args.append(list_arg)
            user_list_args.append(list_arg)

            if name in self.count_sharing:
                kernel_list_arg_values += "%s, " % list_name
                continue

            kernel_list_args.append(VectorArg(index_dtype, "plb_%s_start_index" % name))

            index_name = "plb_%s_index" % name
            user_list_args.append(OtherArg("%s *%s" % (index_ctype, index_name), index_name))

            kernel_list_arg_values += "%s, &%s, " % (list_name, index_name)

        kernel_name = self.name_prefix + "_write"

        from pyopencl.characterize import has_double_support

        src = _LIST_BUILDER_TEMPLATE.render(
            is_count_stage=False,
            kernel_name=kernel_name,
            double_support=all(has_double_support(dev) for dev in self.context.devices),
            debug=self.debug,
            do_not_vectorize=self.do_not_vectorize(),
            kernel_list_arg_decl=_get_arg_decl(kernel_list_args),
            kernel_list_arg_values=kernel_list_arg_values,
            user_list_arg_decl=_get_arg_decl(user_list_args),
            user_list_args=_get_arg_list(user_list_args),
            user_arg_decl=_get_arg_decl(self.arg_decls),
            user_args=_get_arg_list(self.arg_decls),
            list_names_and_dtypes=self.list_names_and_dtypes,
            count_sharing=self.count_sharing,
            name_prefix=self.name_prefix,
            generate_template=self.generate_template,
            preamble=self.preamble,
            index_type=index_ctype,
        )

        src = str(src)

        prg = cl.Program(self.context, src).build(self.options)
        knl = getattr(prg, kernel_name)

        from pyopencl.tools import get_arg_list_scalar_arg_dtypes

        knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(kernel_list_args + self.arg_decls) + [index_dtype])

        return knl
开发者ID:braincorp,项目名称:pyopencl,代码行数:60,代码来源:algorithm.py

示例4: test_astype

def test_astype(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    if not has_double_support(context.devices[0]):
        from pytest import skip
        skip("double precision not supported on %s" % context.devices[0])

    a_gpu = clrand(queue, (2000,), dtype=np.float32)

    a = a_gpu.get().astype(np.float64)
    a2 = a_gpu.astype(np.float64).get()

    assert a2.dtype == np.float64
    assert la.norm(a - a2) == 0, (a, a2)

    a_gpu = clrand(queue, (2000,), dtype=np.float64)

    a = a_gpu.get().astype(np.float32)
    a2 = a_gpu.astype(np.float32).get()

    assert a2.dtype == np.float32
    assert la.norm(a - a2) / la.norm(a) < 1e-7
开发者ID:shinsec,项目名称:pyopencl,代码行数:25,代码来源:test_algorithm.py

示例5: _get_reduction_source

def _get_reduction_source(
        ctx, out_type, out_type_size,
        neutral, reduce_expr, map_expr, parsed_args,
        name="reduce_kernel", preamble="", arg_prep="",
        device=None, max_group_size=None):

    if device is not None:
        devices = [device]
    else:
        devices = ctx.devices

    # {{{ compute group size

    def get_dev_group_size(device):
        # dirty fix for the RV770 boards
        max_work_group_size = device.max_work_group_size
        if "RV770" in device.name:
            max_work_group_size = 64

        # compute lmem limit
        from pytools import div_ceil
        lmem_wg_size = div_ceil(max_work_group_size, out_type_size)
        result = min(max_work_group_size, lmem_wg_size)

        # round down to power of 2
        from pyopencl.tools import bitlog2
        return 2**bitlog2(result)

    group_size = min(get_dev_group_size(dev) for dev in devices)

    if max_group_size is not None:
        group_size = min(max_group_size, group_size)

    # }}}

    from mako.template import Template
    from pytools import all
    from pyopencl.characterize import has_double_support
    src = str(Template(KERNEL).render(
        out_type=out_type,
        arguments=", ".join(arg.declarator() for arg in parsed_args),
        group_size=group_size,
        neutral=neutral,
        reduce_expr=_process_code_for_macro(reduce_expr),
        map_expr=_process_code_for_macro(map_expr),
        name=name,
        preamble=preamble,
        arg_prep=arg_prep,
        double_support=all(has_double_support(dev) for dev in devices),
        ))

    from pytools import Record

    class ReductionInfo(Record):
        pass

    return ReductionInfo(
            context=ctx,
            source=src,
            group_size=group_size)
开发者ID:AI42,项目名称:pyopencl,代码行数:60,代码来源:reduction.py

示例6: test_dot

def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtypes = [np.float32, np.complex64]
    if has_double_support(context.devices[0]):
        dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000,), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000,), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            vdot_ab = np.vdot(a, b)
            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
开发者ID:shinsec,项目名称:pyopencl,代码行数:28,代码来源:test_algorithm.py

示例7: __init__

    def __init__(self, queue, num_work_items,
            luxury=None, seed=None, no_warmup=False,
            use_legacy_init=False, max_work_items=None):
        if luxury is None:
            luxury = 4

        if seed is None:
            from time import time
            seed = int(time()*1e6) % 2<<30

        self.context = queue.context
        self.luxury = luxury
        self.num_work_items = num_work_items

        from pyopencl.characterize import has_double_support
        self.support_double = has_double_support(queue.device)

        self.no_warmup = no_warmup
        self.use_legacy_init = use_legacy_init
        self.max_work_items = max_work_items

        src = """
            %(defines)s

            #include <pyopencl-ranluxcl.cl>

            kernel void init_ranlux(unsigned seeds, global ranluxcl_state_t *ranluxcltab)
            {
              if (get_global_id(0) < %(num_work_items)d)
                ranluxcl_initialization(seeds, ranluxcltab);
            }
            """ % {
                    "defines": self.generate_settings_defines(),
                    "num_work_items": num_work_items
                }
        prg = cl.Program(queue.context, src).build()

        # {{{ compute work group size

        wg_size = None

        import sys
        import platform
        if ("darwin" in sys.platform
                and "Apple" in queue.device.platform.vendor
                and platform.mac_ver()[0].startswith("10.7")
                and queue.device.type == cl.device_type.CPU):
            wg_size = (1,)

        self.wg_size = wg_size

        # }}}

        self.state = cl_array.empty(queue, (num_work_items, 112), dtype=np.uint8)
        self.state.fill(17)

        prg.init_ranlux(queue, (num_work_items,), self.wg_size, np.uint32(seed),
                self.state.data)
开发者ID:EyNuel,项目名称:pyopencl,代码行数:58,代码来源:clrandom.py

示例8: get_count_kernel

    def get_count_kernel(self, index_dtype):
        index_ctype = dtype_to_ctype(index_dtype)
        from pyopencl.tools import VectorArg, OtherArg
        kernel_list_args = [
                VectorArg(index_dtype, "plb_%s_count" % name)
                for name, dtype in self.list_names_and_dtypes
                if name not in self.count_sharing]

        user_list_args = []
        for name, dtype in self.list_names_and_dtypes:
            if name in self.count_sharing:
                continue

            name = "plb_loc_%s_count" % name
            user_list_args.append(OtherArg("%s *%s" % (
                index_ctype, name), name))

        kernel_name = self.name_prefix+"_count"

        from pyopencl.characterize import has_double_support
        src = _LIST_BUILDER_TEMPLATE.render(
                is_count_stage=True,
                kernel_name=kernel_name,
                double_support=all(has_double_support(dev) for dev in
                    self.context.devices),
                debug=self.debug,
                do_not_vectorize=self.do_not_vectorize(),
                eliminate_empty_output_lists=self.eliminate_empty_output_lists,

                kernel_list_arg_decl=_get_arg_decl(kernel_list_args),
                kernel_list_arg_values=_get_arg_list(user_list_args, prefix="&"),
                user_list_arg_decl=_get_arg_decl(user_list_args),
                user_list_args=_get_arg_list(user_list_args),
                user_arg_decl_with_offset=_get_arg_decl(self.arg_decls),
                user_arg_decl_no_offset=_get_arg_decl(self.arg_decls_no_offset),
                user_args_no_offset=_get_arg_list(self.arg_decls_no_offset),
                arg_offset_adjustment=get_arg_offset_adjuster_code(self.arg_decls),

                list_names_and_dtypes=self.list_names_and_dtypes,
                count_sharing=self.count_sharing,
                name_prefix=self.name_prefix,
                generate_template=self.generate_template,
                preamble=self.preamble,

                index_type=index_ctype,
                )

        src = str(src)

        prg = cl.Program(self.context, src).build(self.options)
        knl = getattr(prg, kernel_name)

        from pyopencl.tools import get_arg_list_scalar_arg_dtypes
        knl.set_scalar_arg_dtypes(get_arg_list_scalar_arg_dtypes(
            kernel_list_args+self.arg_decls) + [index_dtype])

        return knl
开发者ID:inducer,项目名称:pyopencl,代码行数:57,代码来源:algorithm.py

示例9: test_get_kernels

def test_get_kernels(ctx_factory, res_type, arg_type, weight_type):
    ctx = ctx_factory()
    dev, = ctx.devices
    if not has_double_support(dev):
        for t in res_type, arg_type, weight_type:
            if t in (np.float64, np.complex128):
                pytest.skip('Device does not support double.')
    for length in range(1, 3):
        lin_comb_kernel(ctx, res_type, arg_type, weight_type, length)
开发者ID:yuyichao,项目名称:pyscical,代码行数:9,代码来源:test_elwise.py

示例10: test_clrandom_dtypes

def test_clrandom_dtypes(ctx_factory, rng_class, dtype):
    cl_ctx = ctx_factory()
    if dtype == np.float64 and not has_double_support(cl_ctx.devices[0]):
        pytest.skip("double precision not supported on this device")
    rng = rng_class(cl_ctx)

    size = 10

    with cl.CommandQueue(cl_ctx) as queue:
        rng.uniform(queue, size, dtype)

        if dtype not in (np.int32, np.int64):
            rng.normal(queue, size, dtype)
开发者ID:inducer,项目名称:pyopencl,代码行数:13,代码来源:test_clrandom.py

示例11: test_hankel_01_complex

def test_hankel_01_complex(ctx_factory, ref_src):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if not has_double_support(ctx.devices[0]):
        from pytest import skip
        skip("no double precision support--cannot test complex bessel function")

    n = 10**6
    np.random.seed(11)
    z = (
        np.logspace(-5, 2, n)
        * np.exp(1j * 2 * np.pi * np.random.rand(n)))

    def get_err(check, ref):
        return np.max(np.abs(check-ref)) / np.max(np.abs(ref))

    if ref_src == "pyfmmlib":
        pyfmmlib = pytest.importorskip("pyfmmlib")
        h0_ref, h1_ref = pyfmmlib.hank103_vec(z, ifexpon=1)
    elif ref_src == "scipy":
        spec = pytest.importorskip("scipy.special")
        h0_ref = spec.hankel1(0, z)
        h1_ref = spec.hankel1(1, z)

    else:
        raise ValueError("ref_src")

    z_dev = cl_array.to_device(queue, z)

    h0_dev, h1_dev = clmath.hankel_01(z_dev)

    rel_err_h0 = np.abs(h0_dev.get() - h0_ref)/np.abs(h0_ref)
    rel_err_h1 = np.abs(h1_dev.get() - h1_ref)/np.abs(h1_ref)

    max_rel_err_h0 = np.max(rel_err_h0)
    max_rel_err_h1 = np.max(rel_err_h1)

    print("H0", max_rel_err_h0)
    print("H1", max_rel_err_h1)

    assert max_rel_err_h0 < 4e-13
    assert max_rel_err_h1 < 2e-13

    if 0:
        import matplotlib.pyplot as pt
        pt.loglog(np.abs(z), rel_err_h0)
        pt.loglog(np.abs(z), rel_err_h1)
        pt.show()
开发者ID:AI42,项目名称:pyopencl,代码行数:49,代码来源:test_clmath.py

示例12: get_dot_kernel

def get_dot_kernel(ctx, dtype_out, dtype_a=None, dtype_b=None,
        conjugate_first=False):
    from pyopencl.characterize import has_double_support
    map_expr, dtype_out, dtype_b = _get_dot_expr(
            dtype_out, dtype_a, dtype_b, conjugate_first,
            has_double_support=has_double_support(ctx.devices[0]))

    return ReductionKernel(ctx, dtype_out, neutral="0",
            reduce_expr="a+b", map_expr=map_expr,
            arguments=
            "const %(tp_a)s *a, "
            "const %(tp_b)s *b" % {
                "tp_a": dtype_to_ctype(dtype_a),
                "tp_b": dtype_to_ctype(dtype_b),
                })
开发者ID:tsmithe,项目名称:pyopencl,代码行数:15,代码来源:reduction.py

示例13: test_random

def test_random(ctx_getter):
    context = ctx_getter()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import rand as clrand

    if has_double_support(context.devices[0]):
        dtypes = [np.float32, np.float64]
    else:
        dtypes = [np.float32]

    for dtype in dtypes:
        a = clrand(context, queue, (10, 100), dtype=dtype).get()

        assert (0 <= a).all()
        assert (a < 1).all()
开发者ID:Almclean,项目名称:pyopencl,代码行数:16,代码来源:test_array.py

示例14: test_random_float_in_range

def test_random_float_in_range(ctx_factory, rng_class, ary_size, plot_hist=False):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    if has_double_support(context.devices[0]):
        dtypes = [np.float32, np.float64]
    else:
        dtypes = [np.float32]

    if rng_class is RanluxGenerator:
        gen = rng_class(queue, 5120)
    else:
        gen = rng_class(context)

    for dtype in dtypes:
        print(dtype)
        ran = cl_array.zeros(queue, ary_size, dtype)
        gen.fill_uniform(ran)

        if plot_hist:
            import matplotlib.pyplot as pt
            pt.hist(ran.get(), 30)
            pt.show()

        assert (0 <= ran.get()).all()
        assert (ran.get() <= 1).all()

        if rng_class is RanluxGenerator:
            gen.synchronize(queue)

        ran = cl_array.zeros(queue, ary_size, dtype)
        gen.fill_uniform(ran, a=4, b=7)
        ran_host = ran.get()

        for cond in [4 <= ran_host,  ran_host <= 7]:
            good = cond.all()
            if not good:
                print(np.where(~cond))
                print(ran_host[~cond])
            assert good

        ran = gen.normal(queue, ary_size, dtype, mu=10, sigma=3)

        if plot_hist:
            import matplotlib.pyplot as pt
            pt.hist(ran.get(), 30)
            pt.show()
开发者ID:inducer,项目名称:pyopencl,代码行数:47,代码来源:test_array.py

示例15: test_bitonic_argsort

def test_bitonic_argsort(ctx_factory, size, dtype):
    import sys
    is_pypy = '__pypy__' in sys.builtin_module_names

    if not size and is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array
        pytest.xfail("pypy doesn't seem to handle as_strided "
                "on zero-sized arrays very well")

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    dev = ctx.devices[0]
    if (dev.platform.name == "Portable Computing Language"
            and sys.platform == "darwin"):
        pytest.xfail("Bitonic sort crashes on Apple POCL")
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
            "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64
            and get_pocl_version(dev.platform) < (1, 0)):
        pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0")

    if dtype == np.float64 and not has_double_support(dev):
        from pytest import skip
        skip("double precision not supported on %s" % dev)

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    index = cl_array.arange(queue, 0, size, 1, dtype=np.int32)
    m = clrandom.rand(queue, (size,), dtype, luxury=None, a=0, b=239432234)

    sorterm = BitonicSort(ctx)

    ms, evt = sorterm(m.copy(), idx=index, axis=0)

    assert np.array_equal(np.sort(m.get()), ms.get())

    # may be False because of identical values in array
    # assert np.array_equal(np.argsort(m.get()), index.get())

    # Check values by indices
    assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
开发者ID:kif,项目名称:pyopencl,代码行数:45,代码来源:test_algorithm.py


注:本文中的pyopencl.characterize.has_double_support函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。