本文整理汇总了Python中cupy.ElementwiseKernel方法的典型用法代码示例。如果您正苦于以下问题:Python cupy.ElementwiseKernel方法的具体用法?Python cupy.ElementwiseKernel怎么用?Python cupy.ElementwiseKernel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cupy
的用法示例。
在下文中一共展示了cupy.ElementwiseKernel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: backward_gpu
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def backward_gpu(self, inputs, gys):
if not self.gpu_optim:
return self.backward_cpu(inputs, gys)
xp = cuda.get_array_module(*inputs)
x, gamma, beta = inputs
gy, = gys
g_beta = xp.sum(gy, axis=0, keepdims=True)
g_gamma = xp.sum(gy*self.normalized, axis=0, keepdims=True)
gy2 = gy*gamma
gy_centered = gy2 - xp.mean(gy2, axis=1, keepdims=True)
sc_prod = xp.sum(gy_centered * self.normalized, axis = 1, keepdims=True)
H = x.shape[1]
# ga = backprop_scale(self.inv_norm, gy_centered, self.normalized, sc_prod/H)
ga = cp.ElementwiseKernel(
'T inv_norm, T gy_centered, T normalized, T sc_prod',
'T z',
'''
z = inv_norm *(gy_centered - normalized * (sc_prod/%f));
'''%H,
'backprop_scale')(self.inv_norm, gy_centered, self.normalized, sc_prod)
return ga, g_gamma, g_beta
示例2: _get_map_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def _get_map_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W coords'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_map,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='shift',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例3: _get_shift_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def _get_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W shift'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_shift,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='shift',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例4: _get_zoom_shift_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def _get_zoom_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W shift, raw W zoom'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_zoom_and_shift,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='zoom_shift',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例5: _get_zoom_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def _get_zoom_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W zoom'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_zoom,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='zoom',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例6: _kernel_finalize
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def _kernel_finalize():
return cupy.ElementwiseKernel(
'int32 maxlabel', 'raw int32 labels, raw Y y',
'''
if (y[i] < 0) {
y[i] = 0;
continue;
}
int yi = y[i];
int j_min = 0;
int j_max = maxlabel - 1;
int j = (j_min + j_max) / 2;
while (j_min < j_max) {
if (yi == labels[j]) break;
if (yi < labels[j]) j_max = j - 1;
else j_min = j + 1;
j = (j_min + j_max) / 2;
}
y[i] = j + 1;
''',
'cupyx_nd_label_finalize')
示例7: test_manual_indexing
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def test_manual_indexing(self, n=100):
in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
uesr_kernel_1 = cupy.ElementwiseKernel(
'T x, T y',
'T z',
'''
z = x + y;
''',
'uesr_kernel_1')
out1 = uesr_kernel_1(in1, in2)
uesr_kernel_2 = cupy.ElementwiseKernel(
'raw T x, raw T y',
'raw T z',
'''
z[i] = x[i] + y[i];
''',
'uesr_kernel_2')
out2 = uesr_kernel_2(in1, in2, size=n)
testing.assert_array_equal(out1, out2)
示例8: test_python_scalar
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def test_python_scalar(self):
for typ in (int, float, bool):
dtype = numpy.dtype(typ).type
in1_cpu = numpy.random.randint(0, 1, (4, 5)).astype(dtype)
in1 = cupy.array(in1_cpu)
scalar_value = typ(2)
uesr_kernel_1 = cupy.ElementwiseKernel(
'T x, T y',
'T z',
'''
z = x + y;
''',
'uesr_kernel_1')
out1 = uesr_kernel_1(in1, scalar_value)
expected = in1_cpu + dtype(2)
testing.assert_array_equal(out1, expected)
示例9: __init__
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def __init__(self):
self.unpack_kernel = cupy.ElementwiseKernel(
'raw T vec, int32 matrix_size',
'raw T mat',
"""
int x = i % matrix_size;
int y = i / matrix_size;
if( x < y ) {
int tmp = y;
y = x;
x = tmp;
}
mat[i] = vec[matrix_size * y - y * (y + 1) / 2 + x];
""",
'unpack'
)
示例10: execute
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def execute(cls, ctx, op):
import cupy as cp
chunk = op.outputs[0]
func = cp.ElementwiseKernel(*_evaluate(chunk))
ctx[chunk.key] = func(*[ctx[i.key] for i in op.inputs])
示例11: test_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def test_kernel(self):
import cupy as cp
x = cp.arange(6, dtype='f').reshape(2, 3)
y = cp.arange(3, dtype='f')
kernel = cp.ElementwiseKernel(
'float32 x, float32 y', 'float32 z',
'''if (x - 2 > y) {
z = x * y;
} else {
z = x + y;
}''',
'my_kernel')
r = kernel(x, y)
self.assertEqual((2, 3), r.shape)
示例12: col2im_gpu
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def col2im_gpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1):
n, c, kh, kw, out_h, out_w = col.shape
img = cp.empty((n, c, h, w), dtype=col.dtype)
cp.ElementwiseKernel(
'raw T col, int32 h, int32 w, int32 out_h, int32 out_w,'
'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,'
'int32 dx, int32 dy',
'T img',
'''
int c0 = i / (h * w);
int y = i / w % h;
int x = i % w;
T val = 0;
for (int ky = 0; ky < kh; ++ky) {
int out_y = (y + ph - ky * dy);
if (0 > out_y || out_y >= out_h * sy) continue;
if (out_y % sy != 0) continue;
out_y /= sy;
for (int kx = 0; kx < kw; ++kx) {
int out_x = (x + pw - kx * dx);
if (0 > out_x || out_x >= out_w * sx) continue;
if (out_x % sx != 0) continue;
out_x /= sx;
int k = out_y + out_h * (kx + kw * (ky + kh * c0));
val = val + col[out_x + out_w * k];
}
}
img = val;
''',
'col2im')(col.reduced_view(),
h, w, out_h, out_w, kh, kw, sy, sx, ph, pw, dx, dy, img)
return img
示例13: elementwise
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def elementwise(in_params, out_params, operation, name, **kwargs):
"""Creates an elementwise kernel function.
This function uses :func:`~chainer.backends.cuda.memoize` to cache the
kernel object, i.e. the resulting kernel object is cached for each argument
combination and CUDA device.
The arguments are the same as those for
:class:`cupy.ElementwiseKernel`, except that the ``name`` argument is
mandatory.
"""
check_cuda_available()
return cupy.ElementwiseKernel(
in_params, out_params, operation, name, **kwargs)
示例14: get_label_lengths
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def get_label_lengths(self, labels):
if self.xp == numpy:
label_lengths = self.xp.zeros(len(labels))
for i in range(len(labels)):
for j in range(len(labels[i])):
if labels.data[i][j] == self.blank_symbol:
label_lengths[i] = j
break
else:
import cupy
label_length_kernel = cupy.ElementwiseKernel(
'raw T labels, int32 blank_symbol, int32 num_labels',
'T length',
'''
for (int j = 0; j < num_labels; ++j) {
T label_value = labels[i * num_labels + j];
if (label_value == blank_symbol) {
length = j;
break;
}
}
''',
'get_label_lengths'
)
label_lengths = label_length_kernel(labels.data, self.blank_symbol, labels.shape[1], size=len(labels))
return label_lengths
示例15: _call_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ElementwiseKernel [as 别名]
def _call_kernel(kernel, input, weights, output, structure=None,
weights_dtype=cupy.float64, structure_dtype=cupy.float64):
"""
Calls a constructed ElementwiseKernel. The kernel must take an input image,
an optional array of weights, an optional array for the structure, and an
output array.
weights and structure can be given as None (structure defaults to None) in
which case they are not passed to the kernel at all. If the output is given
as None then it will be allocated in this function.
This function deals with making sure that the weights and structure are
contiguous and float64 (or bool for weights that are footprints)*, that the
output is allocated and appriopately shaped. This also deals with the
situation that the input and output arrays overlap in memory.
* weights is always cast to float64 or bool in order to get an output
compatible with SciPy, though float32 might be sufficient when input dtype
is low precision. If weights_dtype is passed as weights.dtype then no
dtype conversion will occur. The input and output are never converted.
"""
args = [input]
if weights is not None:
weights = cupy.ascontiguousarray(weights, weights_dtype)
args.append(weights)
if structure is not None:
structure = cupy.ascontiguousarray(structure, structure_dtype)
args.append(structure)
output = _get_output(output, input)
needs_temp = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
if needs_temp:
output, temp = _get_output(output.dtype, input), output
args.append(output)
kernel(*args)
if needs_temp:
temp[...] = output[...]
output = temp
return output