本文整理汇总了Python中numba.cuda.jit函数的典型用法代码示例。如果您正苦于以下问题:Python jit函数的具体用法?Python jit怎么用?Python jit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了jit函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_slice_as_arg
def test_slice_as_arg(self):
global cufoo
cufoo = cuda.jit("void(int32[:], int32[:])", device=True)(foo)
cucopy = cuda.jit("void(int32[:,:], int32[:,:])")(copy)
inp = np.arange(100, dtype=np.int32).reshape(10, 10)
out = np.zeros_like(inp)
cucopy[1, 10](inp, out)
示例2: test_exception
def test_exception(self):
unsafe_foo = cuda.jit(foo)
safe_foo = cuda.jit(debug=True)(foo)
if not config.ENABLE_CUDASIM:
# Simulator throws exceptions regardless of debug
# setting
unsafe_foo[1, 2](numpy.array([0, 1]))
with self.assertRaises(IndexError) as cm:
safe_foo[1, 2](numpy.array([0, 1]))
self.assertIn("tuple index out of range", str(cm.exception))
示例3: test_kernel
def test_kernel(self):
def foo(arr, val):
i = cuda.grid(1)
if i < arr.size:
arr[i] = float32(i) / val
fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(foo)
precver = cuda.jit("void(float32[:], float32)")(foo)
self.assertIn('div.full.ftz.f32', fastver.ptx)
self.assertNotIn('div.full.ftz.f32', precver.ptx)
示例4: test_device
def test_device(self):
# fastmath option is ignored for device function
@cuda.jit("float32(float32, float32)", device=True)
def foo(a, b):
return a / b
def bar(arr, val):
i = cuda.grid(1)
if i < arr.size:
arr[i] = foo(i, val)
fastver = cuda.jit("void(float32[:], float32)", fastmath=True)(bar)
precver = cuda.jit("void(float32[:], float32)")(bar)
self.assertIn('div.full.ftz.f32', fastver.ptx)
self.assertNotIn('div.full.ftz.f32', precver.ptx)
示例5: test_simple_grid1d
def test_simple_grid1d(self):
compiled = cuda.jit("void(int32[::1])")(simple_grid1d)
ntid, nctaid = 3, 7
nelem = ntid * nctaid
ary = np.empty(nelem, dtype=np.int32)
compiled[nctaid, ntid](ary)
self.assertTrue(np.all(ary == np.arange(nelem)))
示例6: test_fill_threadidx
def test_fill_threadidx(self):
compiled = cuda.jit("void(int32[:])")(fill_threadidx)
N = 10
ary = np.ones(N, dtype=np.int32)
exp = np.arange(N, dtype=np.int32)
compiled[1, N](ary)
self.assertTrue(np.all(ary == exp))
示例7: test_const_record_align
def test_const_record_align(self):
A = np.zeros(2, dtype=np.float64)
B = np.zeros(2, dtype=np.float64)
C = np.zeros(2, dtype=np.float64)
D = np.zeros(2, dtype=np.float64)
E = np.zeros(2, dtype=np.float64)
jcuconst = cuda.jit(cuconstRecAlign).specialize(A, B, C, D, E)
if not ENABLE_CUDASIM:
self.assertIn(
'ld.const.v4.u8',
jcuconst.ptx,
'load the first three bytes as a vector')
self.assertIn(
'ld.const.u32',
jcuconst.ptx,
'load the uint32 natively')
self.assertIn(
'ld.const.u8',
jcuconst.ptx,
'load the last byte by itself')
jcuconst[2, 1](A, B, C, D, E)
np.testing.assert_allclose(A, CONST_RECORD_ALIGN['a'])
np.testing.assert_allclose(B, CONST_RECORD_ALIGN['b'])
np.testing.assert_allclose(C, CONST_RECORD_ALIGN['x'])
np.testing.assert_allclose(D, CONST_RECORD_ALIGN['y'])
np.testing.assert_allclose(E, CONST_RECORD_ALIGN['z'])
示例8: test_useless_sync
def test_useless_sync(self):
compiled = cuda.jit("void(int32[::1])")(useless_sync)
nelem = 10
ary = np.empty(nelem, dtype=np.int32)
exp = np.arange(nelem, dtype=np.int32)
compiled[1, nelem](ary)
self.assertTrue(np.all(ary == exp))
示例9: test_atomic_add3
def test_atomic_add3(self):
ary = np.random.randint(0, 32, size=32).astype(np.uint32).reshape(4, 8)
orig = ary.copy()
cuda_atomic_add3 = cuda.jit('void(uint32[:,:])')(atomic_add3)
cuda_atomic_add3[1, (4, 8)](ary)
self.assertTrue(np.all(ary == orig + 1))
示例10: test_boolean
def test_boolean(self):
func = cuda.jit('void(float64[:], bool_)')(boolean_func)
A = np.array([0], dtype='float64')
func(A, True)
self.assertTrue(A[0] == 123)
func(A, False)
self.assertTrue(A[0] == 321)
示例11: test_atomic_add_double_global_3
def test_atomic_add_double_global_3(self):
ary = np.random.randint(0, 32, size=32).astype(np.float64).reshape(4, 8)
orig = ary.copy()
cuda_func = cuda.jit('void(float64[:,:])')(atomic_add_double_global_3)
cuda_func[1, (4, 8)](ary)
np.testing.assert_equal(ary, orig + 1)
示例12: test_printfloat
def test_printfloat(self):
jprintfloat = cuda.jit('void()', debug=False)(printfloat)
with captured_cuda_stdout() as stdout:
jprintfloat()
# CUDA and the simulator use different formats for float formatting
self.assertIn(stdout.getvalue(), ["0 23 34.750000 321\n",
"0 23 34.75 321\n"])
示例13: check_atomic_max
def check_atomic_max(self, dtype, lo, hi):
vals = np.random.randint(lo, hi, size=(32, 32)).astype(dtype)
res = np.zeros(1, dtype=vals.dtype)
cuda_func = cuda.jit(atomic_max)
cuda_func[32, 32](res, vals)
gold = np.max(vals)
np.testing.assert_equal(res, gold)
示例14: test_local_array
def test_local_array(self):
jculocal = cuda.jit('void(int32[:], int32[:])')(culocal)
self.assertTrue('.local' in jculocal.ptx)
A = numpy.arange(100, dtype='int32')
B = numpy.zeros_like(A)
jculocal(A, B)
self.assertTrue(numpy.all(A == B))
示例15: test_const_record
def test_const_record(self):
A = np.zeros(2, dtype=float)
B = np.zeros(2, dtype=int)
jcuconst = cuda.jit(cuconstRec).specialize(A, B)
if not ENABLE_CUDASIM:
if not any(c in jcuconst.ptx for c in [
# a vector load: the compiler fuses the load
# of the x and y fields into a single instruction!
'ld.const.v2.u64',
# for some reason Win64 / Py3 / CUDA 9.1 decides
# to do two u32 loads, and shifts and ors the
# values to get the float `x` field, then uses
# another ld.const.u32 to load the int `y` as
# a 32-bit value!
'ld.const.u32',
]):
raise AssertionError(
"the compiler should realise it doesn't " \
"need to interpret the bytes as float!")
jcuconst[2, 1](A, B)
np.testing.assert_allclose(A, CONST_RECORD['x'])
np.testing.assert_allclose(B, CONST_RECORD['y'])