本文整理汇总了Python中cupy.float32方法的典型用法代码示例。如果您正苦于以下问题:Python cupy.float32方法的具体用法?Python cupy.float32怎么用?Python cupy.float32使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cupy
的用法示例。
在下文中一共展示了cupy.float32方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _call_nms_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def _call_nms_kernel(bbox, thresh):
assert False, "Not supported."
n_bbox = bbox.shape[0]
threads_per_block = 64
col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
blocks = (col_blocks, col_blocks, 1)
threads = (threads_per_block, 1, 1)
mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
bbox, mask_dev))
mask_host = mask_dev.get()
selection, n_selec = _nms_gpu_post(
mask_host, n_bbox, threads_per_block, col_blocks)
return selection, n_selec
示例2: use_single_gpu
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def use_single_gpu():
""" Use single GPU device.
If CUDA_VISIBLE_DEVICES is set, select a device from the variable.
Otherwise, get a free GPU device and use it.
Returns:
assigned GPU id.
"""
cvd = os.environ.get('CUDA_VISIBLE_DEVICES')
if cvd is None:
# no GPUs are researved
cvd = get_free_gpus()[0]
elif ',' in cvd:
# multiple GPUs are researved
cvd = int(cvd.split(',')[0])
else:
# single GPU is reserved
cvd = int(cvd)
# Use the GPU immediately
chainer.cuda.get_device_from_id(cvd).use()
cupy.empty((1,), dtype=cupy.float32)
return cvd
示例3: _call_nms_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def _call_nms_kernel(bbox, thresh):
# PyTorch does not support unsigned long Tensor.
# Doesn't matter,since it returns ndarray finally.
# So I'll keep it unmodified.
n_bbox = bbox.shape[0]
threads_per_block = 64
col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
blocks = (col_blocks, col_blocks, 1)
threads = (threads_per_block, 1, 1)
mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
kern = _load_kernel('nms_kernel', _nms_gpu_code)
kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
bbox, mask_dev))
mask_host = mask_dev.get()
selection, n_selec = _nms_gpu_post(
mask_host, n_bbox, threads_per_block, col_blocks)
return selection, n_selec
示例4: test_template_specialization
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_template_specialization(self):
if self.backend == 'nvcc':
self.skipTest('nvcc does not support template specialization')
# compile code
name_expressions = ['my_sqrt<int>', 'my_sqrt<float>',
'my_sqrt<complex<double>>', 'my_func']
mod = cupy.RawModule(code=test_cxx_template, options=('--std=c++11',),
name_expressions=name_expressions)
dtypes = (cupy.int32, cupy.float32, cupy.complex128, cupy.float64)
for ker_T, dtype in zip(name_expressions, dtypes):
# get specialized kernels
ker = mod.get_function(ker_T)
# prepare inputs & expected outputs
in_arr = cupy.testing.shaped_random((10,), dtype=dtype)
out_arr = in_arr**2
# run
ker((1,), (10,), (in_arr, 10))
# check results
assert cupy.allclose(in_arr, out_arr)
示例5: test_context_switch_RawModule4
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_context_switch_RawModule4(self):
# run test_load_cubin() on another device
# generate cubin in the temp dir and load it on device 0
device0 = cupy.cuda.Device(0)
device1 = cupy.cuda.Device(1)
if device0.compute_capability != device1.compute_capability:
raise pytest.skip()
with device0:
file_path = self._generate_file('cubin')
mod = cupy.RawModule(path=file_path, backend=self.backend)
ker = mod.get_function('test_div')
# in this test, reloading happens at kernel launch
with device1:
x1, x2, y = self._helper(ker, cupy.float32)
assert cupy.allclose(y, x1 / (x2 + 1.0))
示例6: test_manual_indexing
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_manual_indexing(self, n=100):
in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
uesr_kernel_1 = cupy.ElementwiseKernel(
'T x, T y',
'T z',
'''
z = x + y;
''',
'uesr_kernel_1')
out1 = uesr_kernel_1(in1, in2)
uesr_kernel_2 = cupy.ElementwiseKernel(
'raw T x, raw T y',
'raw T z',
'''
z[i] = x[i] + y[i];
''',
'uesr_kernel_2')
out2 = uesr_kernel_2(in1, in2, size=n)
testing.assert_array_equal(out1, out2)
示例7: test_load_pickle
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_load_pickle(self):
a = testing.shaped_arange((2, 3, 4), dtype=cupy.float32)
sio = io.BytesIO()
a.dump(sio)
s = sio.getvalue()
sio.close()
sio = io.BytesIO(s)
b = cupy.load(sio, allow_pickle=True)
testing.assert_array_equal(a, b)
sio.close()
sio = io.BytesIO(s)
with self.assertRaises(ValueError):
cupy.load(sio, allow_pickle=False)
sio.close()
示例8: test_rfft
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_rfft(self, xp, dtype):
a = testing.shaped_random(self.shape, xp, dtype)
if xp is cupy:
from cupyx.scipy.fftpack import get_fft_plan
shape = (self.n,) if self.n is not None else None
plan = get_fft_plan(a, shape=shape, value_type='R2C')
assert isinstance(plan, cupy.cuda.cufft.Plan1d)
with plan:
out = xp.fft.rfft(a, n=self.n, norm=self.norm)
else:
out = xp.fft.rfft(a, n=self.n, norm=self.norm)
if xp is np and dtype in [np.float16, np.float32, np.complex64]:
out = out.astype(np.complex64)
return out
示例9: test_irfft
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_irfft(self, xp, dtype):
a = testing.shaped_random(self.shape, xp, dtype)
if xp is cupy:
from cupyx.scipy.fftpack import get_fft_plan
shape = (self.n,) if self.n is not None else None
plan = get_fft_plan(a, shape=shape, value_type='C2R')
assert isinstance(plan, cupy.cuda.cufft.Plan1d)
with plan:
out = xp.fft.irfft(a, n=self.n, norm=self.norm)
else:
out = xp.fft.irfft(a, n=self.n, norm=self.norm)
if xp is np and dtype in [np.float16, np.float32, np.complex64]:
out = out.astype(np.float32)
return out
示例10: test_irfft2
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_irfft2(self, xp, dtype, order, enable_nd):
assert config.enable_nd_planning == enable_nd
if (10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
and int(cupy.cuda.device.get_compute_capability()) < 70
and _size_last_transform_axis(
self.shape, self.s, self.axes) == 2):
raise unittest.SkipTest('work-around for cuFFT issue')
a = testing.shaped_random(self.shape, xp, dtype)
if order == 'F':
a = xp.asfortranarray(a)
out = xp.fft.irfft2(a, s=self.s, axes=self.axes, norm=self.norm)
if xp is np and dtype in [np.float16, np.float32, np.complex64]:
out = out.astype(np.float32)
return out
示例11: test_irfftn
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_irfftn(self, xp, dtype, order, enable_nd):
assert config.enable_nd_planning == enable_nd
if (10020 >= cupy.cuda.runtime.runtimeGetVersion() >= 10010
and int(cupy.cuda.device.get_compute_capability()) < 70
and _size_last_transform_axis(
self.shape, self.s, self.axes) == 2):
raise unittest.SkipTest('work-around for cuFFT issue')
a = testing.shaped_random(self.shape, xp, dtype)
if order == 'F':
a = xp.asfortranarray(a)
out = xp.fft.irfftn(a, s=self.s, axes=self.axes, norm=self.norm)
if xp is np and dtype in [np.float16, np.float32, np.complex64]:
out = out.astype(np.float32)
return out
# Only those tests in which a legit plan can be obtained are kept
示例12: _call_nms_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def _call_nms_kernel(bbox, thresh):
n_bbox = bbox.shape[0]
threads_per_block = 64
col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
blocks = (col_blocks, col_blocks, 1)
threads = (threads_per_block, 1, 1)
mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
bbox, mask_dev))
mask_host = mask_dev.get()
selection, n_selec = _nms_gpu_post(
mask_host, n_bbox, threads_per_block, col_blocks)
return selection, n_selec
示例13: test_06
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_06(self):
N = 16
Nd = 5
K = 2
M = 4
D = cp.random.randn(Nd, Nd, M)
s = cp.random.randn(N, N, K)
dt = cp.float32
opt = cbpdn.ConvBPDN.Options({'Verbose': False, 'MaxMainIter': 20,
'AutoRho': {'Enabled': True},
'DataType': dt})
lmbda = 1e-1
b = cbpdn.ConvBPDN(D, s, lmbda, opt=opt)
b.solve()
assert b.X.dtype == dt
assert b.Y.dtype == dt
assert b.U.dtype == dt
示例14: test_15
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_15(self):
N = 16
Nd = 5
K = 2
M = 4
D = cp.random.randn(Nd, Nd, M)
s = cp.random.randn(N, N, K)
dt = cp.float32
opt = cbpdn.ConvBPDNJoint.Options(
{'Verbose': False, 'MaxMainIter': 20, 'AutoRho': {'Enabled': True},
'DataType': dt})
lmbda = 1e-1
mu = 1e-2
b = cbpdn.ConvBPDNJoint(D, s, lmbda, mu, opt=opt)
b.solve()
assert b.X.dtype == dt
assert b.Y.dtype == dt
assert b.U.dtype == dt
示例15: test_17
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import float32 [as 别名]
def test_17(self):
N = 16
Nd = 5
K = 2
M = 4
D = cp.random.randn(Nd, Nd, M)
s = cp.random.randn(N, N, K)
dt = cp.float32
opt = cbpdn.ConvElasticNet.Options(
{'Verbose': False, 'LinSolveCheck': True, 'MaxMainIter': 20,
'AutoRho': {'Enabled': True}, 'DataType': dt})
lmbda = 1e-1
mu = 1e-2
b = cbpdn.ConvElasticNet(D, s, lmbda, mu, opt=opt)
b.solve()
assert b.X.dtype == dt
assert b.Y.dtype == dt
assert b.U.dtype == dt