本文整理汇总了Python中cupy.ascontiguousarray方法的典型用法代码示例。如果您正苦于以下问题:Python cupy.ascontiguousarray方法的具体用法?Python cupy.ascontiguousarray怎么用?Python cupy.ascontiguousarray使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cupy
的用法示例。
在下文中一共展示了cupy.ascontiguousarray方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _call_nms_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def _call_nms_kernel(bbox, thresh):
assert False, "Not supported."
n_bbox = bbox.shape[0]
threads_per_block = 64
col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
blocks = (col_blocks, col_blocks, 1)
threads = (threads_per_block, 1, 1)
mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
bbox, mask_dev))
mask_host = mask_dev.get()
selection, n_selec = _nms_gpu_post(
mask_host, n_bbox, threads_per_block, col_blocks)
return selection, n_selec
示例2: _call_nms_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def _call_nms_kernel(bbox, thresh):
# PyTorch does not support unsigned long Tensor.
# Doesn't matter,since it returns ndarray finally.
# So I'll keep it unmodified.
n_bbox = bbox.shape[0]
threads_per_block = 64
col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
blocks = (col_blocks, col_blocks, 1)
threads = (threads_per_block, 1, 1)
mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
kern = _load_kernel('nms_kernel', _nms_gpu_code)
kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
bbox, mask_dev))
mask_host = mask_dev.get()
selection, n_selec = _nms_gpu_post(
mask_host, n_bbox, threads_per_block, col_blocks)
return selection, n_selec
示例3: _call_nms_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def _call_nms_kernel(bbox, thresh):
n_bbox = bbox.shape[0]
threads_per_block = 64
col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
blocks = (col_blocks, col_blocks, 1)
threads = (threads_per_block, 1, 1)
mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
bbox, mask_dev))
mask_host = mask_dev.get()
selection, n_selec = _nms_gpu_post(
mask_host, n_bbox, threads_per_block, col_blocks)
return selection, n_selec
示例4: _voxelize_sub2
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def _voxelize_sub2(faces, size):
bs, nf = faces.shape[:2]
faces = cp.ascontiguousarray(faces)
voxels = cp.zeros((faces.shape[0], size, size, size), 'int32')
chainer.cuda.elementwise(
'int32 j, raw T faces, raw int32 bs, raw int32 nf, raw int32 vs',
'raw int32 voxels',
'''
int fn = j % nf;
int bn = j / nf;
float* face = &faces[(bn * nf + fn) * 9];
for (int k = 0; k < 3; k++) {
int yi = face[3 * k + 0];
int xi = face[3 * k + 1];
int zi = face[3 * k + 2];
if ((0 <= yi) && (yi < vs) && (0 <= xi) && (xi < vs) && (0 <= zi) && (zi < vs))
voxels[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] = 1;
}
''',
'function',
)(cp.arange(bs * nf).astype('int32'), faces, bs, nf, size, voxels)
return voxels
示例5: get_pointer
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def get_pointer(v):
def p(e):
return ctypes.c_void_p(e.data.ptr)
if not v.flags.c_contiguous:
c = cp.ascontiguousarray(v)
return p(c), c
return p(v)
示例6: _check_size_footprint_structure
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def _check_size_footprint_structure(ndim, size, footprint, structure,
stacklevel=3, force_footprint=False):
if structure is None and footprint is None:
if size is None:
raise RuntimeError("no footprint or filter size provided")
sizes = _fix_sequence_arg(size, ndim, 'size', int)
if force_footprint:
return None, cupy.ones(sizes, bool), None
return sizes, None, None
if size is not None:
warnings.warn("ignoring size because {} is set".format(
'structure' if footprint is None else 'footprint'),
UserWarning, stacklevel=stacklevel+1)
if footprint is not None:
footprint = cupy.array(footprint, bool, True, 'C')
if not footprint.any():
raise ValueError("all-zero footprint is not supported")
if structure is None:
if not force_footprint and footprint.all():
return footprint.shape, None, None
return None, footprint, None
structure = cupy.ascontiguousarray(structure)
if footprint is None:
footprint = cupy.ones(structure.shape, bool)
return None, footprint, structure
示例7: _call_kernel
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def _call_kernel(kernel, input, weights, output, structure=None,
weights_dtype=cupy.float64, structure_dtype=cupy.float64):
"""
Calls a constructed ElementwiseKernel. The kernel must take an input image,
an optional array of weights, an optional array for the structure, and an
output array.
weights and structure can be given as None (structure defaults to None) in
which case they are not passed to the kernel at all. If the output is given
as None then it will be allocated in this function.
This function deals with making sure that the weights and structure are
contiguous and float64 (or bool for weights that are footprints)*, that the
output is allocated and appriopately shaped. This also deals with the
situation that the input and output arrays overlap in memory.
* weights is always cast to float64 or bool in order to get an output
compatible with SciPy, though float32 might be sufficient when input dtype
is low precision. If weights_dtype is passed as weights.dtype then no
dtype conversion will occur. The input and output are never converted.
"""
args = [input]
if weights is not None:
weights = cupy.ascontiguousarray(weights, weights_dtype)
args.append(weights)
if structure is not None:
structure = cupy.ascontiguousarray(structure, structure_dtype)
args.append(structure)
output = _get_output(output, input)
needs_temp = cupy.shares_memory(output, input, 'MAY_SHARE_BOUNDS')
if needs_temp:
output, temp = _get_output(output.dtype, input), output
args.append(output)
kernel(*args)
if needs_temp:
temp[...] = output[...]
output = temp
return output
示例8: test_ascontiguousarray_on_noncontiguous_array
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def test_ascontiguousarray_on_noncontiguous_array(self):
a = testing.shaped_arange((2, 3, 4))
b = a.transpose(2, 0, 1)
c = cupy.ascontiguousarray(b)
assert c.flags.c_contiguous
testing.assert_array_equal(b, c)
示例9: test_ascontiguousarray_on_contiguous_array
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def test_ascontiguousarray_on_contiguous_array(self):
a = testing.shaped_arange((2, 3, 4))
b = cupy.ascontiguousarray(a)
assert a is b
示例10: test_asarray_cuda_array_zero_dim
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def test_asarray_cuda_array_zero_dim(self, xp):
a = xp.ones(())
return xp.ascontiguousarray(a)
示例11: _fftn
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def _fftn(a, s, axes, norm, direction, value_type='C2C', order='A', plan=None,
overwrite_x=False, out=None):
if norm not in (None, 'ortho'):
raise ValueError('Invalid norm value %s, should be None or "ortho".'
% norm)
axes, axes_sorted = _prep_fftn_axes(a.ndim, s, axes, value_type)
if not axes_sorted:
if value_type == 'C2C':
return a
else:
raise IndexError('list index out of range')
a = _convert_dtype(a, value_type)
if order == 'A':
if a.flags.f_contiguous:
order = 'F'
elif a.flags.c_contiguous:
order = 'C'
else:
a = cupy.ascontiguousarray(a)
order = 'C'
elif order not in ['C', 'F']:
raise ValueError('Unsupported order: {}'.format(order))
# Note: need to call _cook_shape prior to sorting the axes
a = _cook_shape(a, s, axes, value_type, order=order)
if order == 'C' and not a.flags.c_contiguous:
a = cupy.ascontiguousarray(a)
elif order == 'F' and not a.flags.f_contiguous:
a = cupy.asfortranarray(a)
# _cook_shape tells us input shape only, and not output shape
out_size = _get_fftn_out_size(a.shape, s, axes_sorted[-1], value_type)
a = _exec_fftn(a, direction, value_type, norm=norm, axes=axes_sorted,
overwrite_x=overwrite_x, plan=plan, out=out,
out_size=out_size)
return a
示例12: reduced_binary_einsum
# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import ascontiguousarray [as 别名]
def reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
set0 = set(sub0)
set1 = set(sub1)
assert len(set0) == len(sub0), 'operand 0 should be reduced: diagonal'
assert len(set1) == len(sub1), 'operand 1 should be reduced: diagonal'
if len(sub0) == 0 or len(sub1) == 0:
return arr0 * arr1, sub0 + sub1
set_others = set(sub_others)
shared = set0 & set1
batch_dims = shared & set_others
contract_dims = shared - batch_dims
bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)
sub_b = [sub0[axis] for axis in bs0]
assert sub_b == [sub1[axis] for axis in bs1]
sub_l = [sub0[axis] for axis in ts0]
sub_r = [sub1[axis] for axis in ts1]
sub_out = sub_b + sub_l + sub_r
assert set(sub_out) <= set_others, 'operands should be reduced: unary sum'
if len(contract_dims) == 0:
# Use element-wise multiply when no contraction is needed
if len(sub_out) == len(sub_others):
# to assure final output of einsum is C-contiguous
sub_out = sub_others
arr0 = _expand_dims_transpose(arr0, sub0, sub_out)
arr1 = _expand_dims_transpose(arr1, sub1, sub_out)
return arr0 * arr1, sub_out
if _use_cutensor(arr0.dtype, sub0, arr1.dtype, sub1,
batch_dims, contract_dims):
if len(sub_out) == len(sub_others):
# to assure final output of einsum is C-contiguous
sub_out = sub_others
out_shape = _get_out_shape(arr0.shape, sub0, arr1.shape, sub1, sub_out)
arr_out = cupy.empty(out_shape, arr0.dtype)
arr0 = cupy.ascontiguousarray(arr0)
arr1 = cupy.ascontiguousarray(arr1)
desc_0 = cutensor.create_tensor_descriptor(arr0)
desc_1 = cutensor.create_tensor_descriptor(arr1)
desc_out = cutensor.create_tensor_descriptor(arr_out)
arr_out = cutensor.contraction(1.0,
arr0, desc_0, sub0,
arr1, desc_1, sub1,
0.0,
arr_out, desc_out, sub_out)
return arr_out, sub_out
tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
assert shapes0[0] == shapes1[0]
arr_out = cupy.matmul(tmp0, tmp1).reshape(shapes_out)
return arr_out, sub_out