本文整理匯總了Python中cupy.ElementwiseKernel方法的典型用法代碼示例。如果您正苦於以下問題:Python cupy.ElementwiseKernel方法的具體用法?Python cupy.ElementwiseKernel怎麽用?Python cupy.ElementwiseKernel使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類cupy
的用法示例。
在下文中一共展示了cupy.ElementwiseKernel方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: backward_gpu
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def backward_gpu(self, inputs, gys):
if not self.gpu_optim:
return self.backward_cpu(inputs, gys)
xp = cuda.get_array_module(*inputs)
x, gamma, beta = inputs
gy, = gys
g_beta = xp.sum(gy, axis=0, keepdims=True)
g_gamma = xp.sum(gy*self.normalized, axis=0, keepdims=True)
gy2 = gy*gamma
gy_centered = gy2 - xp.mean(gy2, axis=1, keepdims=True)
sc_prod = xp.sum(gy_centered * self.normalized, axis = 1, keepdims=True)
H = x.shape[1]
# ga = backprop_scale(self.inv_norm, gy_centered, self.normalized, sc_prod/H)
ga = cp.ElementwiseKernel(
'T inv_norm, T gy_centered, T normalized, T sc_prod',
'T z',
'''
z = inv_norm *(gy_centered - normalized * (sc_prod/%f));
'''%H,
'backprop_scale')(self.inv_norm, gy_centered, self.normalized, sc_prod)
return ga, g_gamma, g_beta
示例2: _get_map_kernel
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def _get_map_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W coords'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_map,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='shift',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例3: _get_shift_kernel
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def _get_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W shift'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_shift,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='shift',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例4: _get_zoom_shift_kernel
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def _get_zoom_shift_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W shift, raw W zoom'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_zoom_and_shift,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='zoom_shift',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例5: _get_zoom_kernel
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def _get_zoom_kernel(ndim, large_int, yshape, mode, cval=0.0, order=1,
integer_output=False):
in_params = 'raw X x, raw W zoom'
out_params = 'Y y'
operation, name = _generate_interp_custom(
coord_func=_get_coord_zoom,
ndim=ndim,
large_int=large_int,
yshape=yshape,
mode=mode,
cval=cval,
order=order,
name='zoom',
integer_output=integer_output,
)
return cupy.ElementwiseKernel(in_params, out_params, operation, name)
示例6: _kernel_finalize
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def _kernel_finalize():
return cupy.ElementwiseKernel(
'int32 maxlabel', 'raw int32 labels, raw Y y',
'''
if (y[i] < 0) {
y[i] = 0;
continue;
}
int yi = y[i];
int j_min = 0;
int j_max = maxlabel - 1;
int j = (j_min + j_max) / 2;
while (j_min < j_max) {
if (yi == labels[j]) break;
if (yi < labels[j]) j_max = j - 1;
else j_min = j + 1;
j = (j_min + j_max) / 2;
}
y[i] = j + 1;
''',
'cupyx_nd_label_finalize')
示例7: test_manual_indexing
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def test_manual_indexing(self, n=100):
in1 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
in2 = cupy.random.uniform(-1, 1, n).astype(cupy.float32)
uesr_kernel_1 = cupy.ElementwiseKernel(
'T x, T y',
'T z',
'''
z = x + y;
''',
'uesr_kernel_1')
out1 = uesr_kernel_1(in1, in2)
uesr_kernel_2 = cupy.ElementwiseKernel(
'raw T x, raw T y',
'raw T z',
'''
z[i] = x[i] + y[i];
''',
'uesr_kernel_2')
out2 = uesr_kernel_2(in1, in2, size=n)
testing.assert_array_equal(out1, out2)
示例8: test_python_scalar
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def test_python_scalar(self):
for typ in (int, float, bool):
dtype = numpy.dtype(typ).type
in1_cpu = numpy.random.randint(0, 1, (4, 5)).astype(dtype)
in1 = cupy.array(in1_cpu)
scalar_value = typ(2)
uesr_kernel_1 = cupy.ElementwiseKernel(
'T x, T y',
'T z',
'''
z = x + y;
''',
'uesr_kernel_1')
out1 = uesr_kernel_1(in1, scalar_value)
expected = in1_cpu + dtype(2)
testing.assert_array_equal(out1, expected)
示例9: __init__
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def __init__(self):
self.unpack_kernel = cupy.ElementwiseKernel(
'raw T vec, int32 matrix_size',
'raw T mat',
"""
int x = i % matrix_size;
int y = i / matrix_size;
if( x < y ) {
int tmp = y;
y = x;
x = tmp;
}
mat[i] = vec[matrix_size * y - y * (y + 1) / 2 + x];
""",
'unpack'
)
示例10: execute
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def execute(cls, ctx, op):
import cupy as cp
chunk = op.outputs[0]
func = cp.ElementwiseKernel(*_evaluate(chunk))
ctx[chunk.key] = func(*[ctx[i.key] for i in op.inputs])
示例11: test_kernel
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def test_kernel(self):
import cupy as cp
x = cp.arange(6, dtype='f').reshape(2, 3)
y = cp.arange(3, dtype='f')
kernel = cp.ElementwiseKernel(
'float32 x, float32 y', 'float32 z',
'''if (x - 2 > y) {
z = x * y;
} else {
z = x + y;
}''',
'my_kernel')
r = kernel(x, y)
self.assertEqual((2, 3), r.shape)
示例12: col2im_gpu
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def col2im_gpu(col, sy, sx, ph, pw, h, w, dy=1, dx=1):
n, c, kh, kw, out_h, out_w = col.shape
img = cp.empty((n, c, h, w), dtype=col.dtype)
cp.ElementwiseKernel(
'raw T col, int32 h, int32 w, int32 out_h, int32 out_w,'
'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw,'
'int32 dx, int32 dy',
'T img',
'''
int c0 = i / (h * w);
int y = i / w % h;
int x = i % w;
T val = 0;
for (int ky = 0; ky < kh; ++ky) {
int out_y = (y + ph - ky * dy);
if (0 > out_y || out_y >= out_h * sy) continue;
if (out_y % sy != 0) continue;
out_y /= sy;
for (int kx = 0; kx < kw; ++kx) {
int out_x = (x + pw - kx * dx);
if (0 > out_x || out_x >= out_w * sx) continue;
if (out_x % sx != 0) continue;
out_x /= sx;
int k = out_y + out_h * (kx + kw * (ky + kh * c0));
val = val + col[out_x + out_w * k];
}
}
img = val;
''',
'col2im')(col.reduced_view(),
h, w, out_h, out_w, kh, kw, sy, sx, ph, pw, dx, dy, img)
return img
示例13: elementwise
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def elementwise(in_params, out_params, operation, name, **kwargs):
"""Creates an elementwise kernel function.
This function uses :func:`~chainer.backends.cuda.memoize` to cache the
kernel object, i.e. the resulting kernel object is cached for each argument
combination and CUDA device.
The arguments are the same as those for
:class:`cupy.ElementwiseKernel`, except that the ``name`` argument is
mandatory.
"""
check_cuda_available()
return cupy.ElementwiseKernel(
in_params, out_params, operation, name, **kwargs)
示例14: get_label_lengths
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def get_label_lengths(self, labels):
if self.xp == numpy:
label_lengths = self.xp.zeros(len(labels))
for i in range(len(labels)):
for j in range(len(labels[i])):
if labels.data[i][j] == self.blank_symbol:
label_lengths[i] = j
break
else:
import cupy
label_length_kernel = cupy.ElementwiseKernel(
'raw T labels, int32 blank_symbol, int32 num_labels',
'T length',
'''
for (int j = 0; j < num_labels; ++j) {
T label_value = labels[i * num_labels + j];
if (label_value == blank_symbol) {
length = j;
break;
}
}
''',
'get_label_lengths'
)
label_lengths = label_length_kernel(labels.data, self.blank_symbol, labels.shape[1], size=len(labels))
return label_lengths
示例15: _call_kernel
# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import ElementwiseKernel [as 別名]
def _call_kernel(kernel, input, weights, output, structure=None,
weights_dtype=cupy.float64, structure_dtype=cupy.float64):
"""
Calls a constructed ElementwiseKernel. The kernel must take an input image,
an optional array of weights, an optional array for the structure, and an
output array.
weights and structure can be given as None (structure defaults to None) in
which case they are not passed to the kernel at all. If the output is given
as None then it will be allocated in this function.
This function deals with making sure that the weights and structure are
contiguous and float64 (or bool for weights that are footprints)*, that the
output is allocated and appriopately shaped. This also deals with the
situation that the input and output arrays overlap in memory.
* weights is always cast to float64 or bool in order to get an output
compatible with SciPy, though float32 might be sufficient when input dtype
is low precision. If weights_dtype is passed as weights.dtype then no
dtype conversion will occur. The input and output are never converted.
"""
args = [input]
if weights is not None:
weights = cupy.ascontiguousarray(weights, weights_dtype)
args.append(weights)
if structure is not None:
structure = cupy.ascontiguousarray(structure, structure_dtype)
args.append(structure)
output = _get_output(output, input)
needs_temp = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
if needs_temp:
output, temp = _get_output(output.dtype, input), output
args.append(output)
kernel(*args)
if needs_temp:
temp[...] = output[...]
output = temp
return output