Python cuda.jit函数代码示例

本文整理汇总了Python中numba.cuda.jit函数的典型用法代码示例。如果您正苦于以下问题：Python jit函数的具体用法？Python jit怎么用？Python jit使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了jit函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_slice_as_arg

    def test_slice_as_arg(self):
        global cufoo
        cufoo = cuda.jit("void(int32[:], int32[:])", device=True)(foo)
        cucopy = cuda.jit("void(int32[:,:], int32[:,:])")(copy)

        inp = np.arange(100, dtype=np.int32).reshape(10, 10)
        out = np.zeros_like(inp)

        cucopy[1, 10](inp, out)

开发者ID:ASPP，项目名称:numba，代码行数:9，代码来源:test_slicing.py

示例2: test_exception

    def test_exception(self):
        unsafe_foo = cuda.jit(foo)
        safe_foo = cuda.jit(debug=True)(foo)

        if not config.ENABLE_CUDASIM:
            # Simulator throws exceptions regardless of debug
            # setting
            unsafe_foo[1, 2](numpy.array([0, 1]))

        with self.assertRaises(IndexError) as cm:
            safe_foo[1, 2](numpy.array([0, 1]))
        self.assertIn("tuple index out of range", str(cm.exception))

开发者ID:GaZ3ll3，项目名称:numba，代码行数:12，代码来源:test_exception.py

示例3: test_kernel

    def test_kernel(self):

        def foo(arr, val):
            i = cuda.grid(1)
            if i < arr.size:
                arr[i] = float32(i) / val

        fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(foo)
        precver = cuda.jit("void(float32[:], float32)")(foo)

        self.assertIn('div.full.ftz.f32', fastver.ptx)
        self.assertNotIn('div.full.ftz.f32', precver.ptx)

开发者ID:Alexhuszagh，项目名称:numba，代码行数:12，代码来源:test_fastmath.py

示例4: test_device

    def test_device(self):
        # fastmath option is ignored for device function
        @cuda.jit("float32(float32, float32)", device=True)
        def foo(a, b):
            return a / b

        def bar(arr, val):
            i = cuda.grid(1)
            if i < arr.size:
                arr[i] = foo(i, val)

        fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(bar)
        precver = cuda.jit("void(float32[:], float32)")(bar)

        self.assertIn('div.full.ftz.f32', fastver.ptx)
        self.assertNotIn('div.full.ftz.f32', precver.ptx)

开发者ID:Alexhuszagh，项目名称:numba，代码行数:16，代码来源:test_fastmath.py

示例5: test_simple_grid1d

 def test_simple_grid1d(self):
     compiled = cuda.jit("void(int32[::1])")(simple_grid1d)
     ntid, nctaid = 3, 7
     nelem = ntid * nctaid
     ary = np.empty(nelem, dtype=np.int32)
     compiled[nctaid, ntid](ary)
     self.assertTrue(np.all(ary == np.arange(nelem)))

开发者ID:ASPP，项目名称:numba，代码行数:7，代码来源:test_intrinsics.py

示例6: test_fill_threadidx

 def test_fill_threadidx(self):
     compiled = cuda.jit("void(int32[:])")(fill_threadidx)
     N = 10
     ary = np.ones(N, dtype=np.int32)
     exp = np.arange(N, dtype=np.int32)
     compiled[1, N](ary)
     self.assertTrue(np.all(ary == exp))

开发者ID:ASPP，项目名称:numba，代码行数:7，代码来源:test_intrinsics.py

示例7: test_const_record_align

    def test_const_record_align(self):
        A = np.zeros(2, dtype=np.float64)
        B = np.zeros(2, dtype=np.float64)
        C = np.zeros(2, dtype=np.float64)
        D = np.zeros(2, dtype=np.float64)
        E = np.zeros(2, dtype=np.float64)
        jcuconst = cuda.jit(cuconstRecAlign).specialize(A, B, C, D, E)

        if not ENABLE_CUDASIM:
            self.assertIn(
                'ld.const.v4.u8',
                jcuconst.ptx,
                'load the first three bytes as a vector')

            self.assertIn(
                'ld.const.u32',
                jcuconst.ptx,
                'load the uint32 natively')

            self.assertIn(
                'ld.const.u8',
                jcuconst.ptx,
                'load the last byte by itself')

        jcuconst[2, 1](A, B, C, D, E)
        np.testing.assert_allclose(A, CONST_RECORD_ALIGN['a'])
        np.testing.assert_allclose(B, CONST_RECORD_ALIGN['b'])
        np.testing.assert_allclose(C, CONST_RECORD_ALIGN['x'])
        np.testing.assert_allclose(D, CONST_RECORD_ALIGN['y'])
        np.testing.assert_allclose(E, CONST_RECORD_ALIGN['z'])

开发者ID:esc，项目名称:numba，代码行数:30，代码来源:test_constmem.py

示例8: test_useless_sync

 def test_useless_sync(self):
     compiled = cuda.jit("void(int32[::1])")(useless_sync)
     nelem = 10
     ary = np.empty(nelem, dtype=np.int32)
     exp = np.arange(nelem, dtype=np.int32)
     compiled[1, nelem](ary)
     self.assertTrue(np.all(ary == exp))

开发者ID:Alexhuszagh，项目名称:numba，代码行数:7，代码来源:test_sync.py

示例9: test_atomic_add3

    def test_atomic_add3(self):
        ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8)
        orig = ary.copy()
        cuda_atomic_add3 = cuda.jit('void(uint32[:,:])')(atomic_add3)
        cuda_atomic_add3[1, (4, 8)](ary)

        self.assertTrue(np.all(ary == orig + 1))

开发者ID:MJJoyce，项目名称:numba，代码行数:7，代码来源:test_atomics.py

示例10: test_boolean

 def test_boolean(self):
     func = cuda.jit('void(float64[:], bool_)')(boolean_func)
     A = np.array([0], dtype='float64')
     func(A, True)
     self.assertTrue(A[0] == 123)
     func(A, False)
     self.assertTrue(A[0] == 321)

开发者ID:cpcloud，项目名称:numba，代码行数:7，代码来源:test_boolean.py

示例11: test_atomic_add_double_global_3

    def test_atomic_add_double_global_3(self):
        ary = np.random.randint(0, 32, size=32).astype(np.float64).reshape(4, 8)
        orig = ary.copy()
        cuda_func = cuda.jit('void(float64[:,:])')(atomic_add_double_global_3)
        cuda_func[1, (4, 8)](ary)

        np.testing.assert_equal(ary, orig + 1)

开发者ID:cpcloud，项目名称:numba，代码行数:7，代码来源:test_atomics.py

示例12: test_printfloat

 def test_printfloat(self):
     jprintfloat = cuda.jit('void()', debug=False)(printfloat)
     with captured_cuda_stdout() as stdout:
         jprintfloat()
     # CUDA and the simulator use different formats for float formatting
     self.assertIn(stdout.getvalue(), ["0 23 34.750000 321\n",
                                       "0 23 34.75 321\n"])

开发者ID:Alexhuszagh，项目名称:numba，代码行数:7，代码来源:test_print.py

示例13: check_atomic_max

 def check_atomic_max(self, dtype, lo, hi):
     vals = np.random.randint(lo, hi, size=(32, 32)).astype(dtype)
     res = np.zeros(1, dtype=vals.dtype)
     cuda_func = cuda.jit(atomic_max)
     cuda_func[32, 32](res, vals)
     gold = np.max(vals)
     np.testing.assert_equal(res, gold)

开发者ID:cpcloud，项目名称:numba，代码行数:7，代码来源:test_atomics.py

示例14: test_local_array

 def test_local_array(self):
     jculocal = cuda.jit('void(int32[:], int32[:])')(culocal)
     self.assertTrue('.local' in jculocal.ptx)
     A = numpy.arange(100, dtype='int32')
     B = numpy.zeros_like(A)
     jculocal(A, B)
     self.assertTrue(numpy.all(A == B))

开发者ID:PierreBizouard，项目名称:numba，代码行数:7，代码来源:test_localmem.py

示例15: test_const_record

    def test_const_record(self):
        A = np.zeros(2, dtype=float)
        B = np.zeros(2, dtype=int)
        jcuconst = cuda.jit(cuconstRec).specialize(A, B)

        if not ENABLE_CUDASIM:
            if not any(c in jcuconst.ptx for c in [
                # a vector load: the compiler fuses the load
                # of the x and y fields into a single instruction!
                'ld.const.v2.u64',

                # for some reason Win64 / Py3 / CUDA 9.1 decides
                # to do two u32 loads, and shifts and ors the
                # values to get the float `x` field, then uses
                # another ld.const.u32 to load the int `y` as
                # a 32-bit value!
                'ld.const.u32',
            ]):
                raise AssertionError(
                    "the compiler should realise it doesn't " \
                    "need to interpret the bytes as float!")

        jcuconst[2, 1](A, B)
        np.testing.assert_allclose(A, CONST_RECORD['x'])
        np.testing.assert_allclose(B, CONST_RECORD['y'])

开发者ID:esc，项目名称:numba，代码行数:25，代码来源:test_constmem.py

注：本文中的numba.cuda.jit函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。