当前位置: 首页>>代码示例>>Python>>正文


Python cupy.RawKernel方法代码示例

本文整理汇总了Python中cupy.RawKernel方法的典型用法代码示例。如果您正苦于以下问题:Python cupy.RawKernel方法的具体用法?Python cupy.RawKernel怎么用?Python cupy.RawKernel使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在cupy的用法示例。


在下文中一共展示了cupy.RawKernel方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _call_nms_kernel

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def _call_nms_kernel(bbox, thresh):
    assert False, "Not supported."
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
开发者ID:pfnet-research,项目名称:chainer-compiler,代码行数:20,代码来源:non_maximum_suppression.py

示例2: raw

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def raw(code, name, *args, **kwargs):
    """Creates a raw kernel function.

    This function uses :func:`~chainer.backends.cuda.memoize` to cache the
    resulting kernel object, i.e. the resulting kernel object is cached for
    each argument combination and CUDA device.

    The arguments are the same as those for :class:`cupy.RawKernel`.

    """
    check_cuda_available()
    return cupy.RawKernel(code, name, *args, **kwargs)


# ------------------------------------------------------------------------------
# numpy/cupy compatible coding
# ------------------------------------------------------------------------------ 
开发者ID:chainer,项目名称:chainer,代码行数:19,代码来源:cuda.py

示例3: setUp

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def setUp(self):
        self.dev = cupy.cuda.runtime.getDevice()
        assert self.dev != 1

        global _test_cache_dir
        _test_cache_dir = tempfile.mkdtemp()
        os.environ['CUPY_CACHE_DIR'] = _test_cache_dir

        self.kern = cupy.RawKernel(
            _test_source1, 'test_sum',
            backend=self.backend)
        self.mod2 = cupy.RawModule(
            code=_test_source2,
            backend=self.backend)
        self.mod3 = cupy.RawModule(
            code=_test_source3,
            options=('-DPRECISION=2',),
            backend=self.backend) 
开发者ID:cupy,项目名称:cupy,代码行数:20,代码来源:test_raw.py

示例4: _call_nms_kernel

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
开发者ID:chainer,项目名称:chainercv,代码行数:19,代码来源:non_maximum_suppression.py

示例5: sgemm

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def sgemm(A, B,
          dim_x=16, dim_y=16, blk_m=64, blk_n=64, blk_k=4,
          dim_xa=64, dim_ya=4, dim_xb=4, dim_yb=64):
    assert A.dtype == cp.float32
    assert B.dtype == cp.float32
    assert(dim_x * dim_y == dim_xa * dim_ya == dim_xb * dim_yb)

    m, k = A.shape
    k, n = B.shape

    # Inputs matrices need to be in Fortran order.
    A = cp.asfortranarray(A)
    B = cp.asfortranarray(B)

    C = cp.empty((m, n), dtype=cp.float32, order='F')

    config = {'DIM_X': dim_x, 'DIM_Y': dim_y,
              'BLK_M': blk_m, 'BLK_N': blk_n, 'BLK_K': blk_k,
              'DIM_XA': dim_xa, 'DIM_YA': dim_ya,
              'DIM_XB': dim_xb, 'DIM_YB': dim_yb,
              'THR_M': blk_m // dim_x, 'THR_N': blk_n // dim_y}
    code = read_code(sgemm_file, params=config)
    kern = cp.RawKernel(code, 'sgemm')

    grid = (int(math.ceil(m / blk_m)), int(math.ceil(n / blk_n)), 1)
    block = (dim_x, dim_y, 1)
    args = (m, n, k, A, B, C)
    shared_mem = blk_k * (blk_m + 1) * 4 + blk_n * (blk_k + 1) * 4
    kern(grid, block, args=args, shared_mem=shared_mem)
    return C 
开发者ID:cupy,项目名称:cupy,代码行数:32,代码来源:sgemm.py

示例6: test_dynamical_parallelism

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def test_dynamical_parallelism(self):
        ker = cupy.RawKernel(_test_source4, 'test_kernel', options=('-dc',),
                             backend=self.backend)
        N = 169
        inner_chunk = 13
        x = cupy.zeros((N,), dtype=cupy.float32)
        ker((1,), (N//inner_chunk,), (x, N, inner_chunk))
        assert (x == 1.0).all() 
开发者ID:cupy,项目名称:cupy,代码行数:10,代码来源:test_raw.py

示例7: test_dynamical_parallelism_compile_failure

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def test_dynamical_parallelism_compile_failure(self):
        # no option for separate compilation is given should cause an error
        ker = cupy.RawKernel(_test_source4, 'test_kernel',
                             backend=self.backend)
        N = 10
        inner_chunk = 2
        x = cupy.zeros((N,), dtype=cupy.float32)
        if self.backend == 'nvrtc':
            # raised when calling ls.complete()
            with pytest.raises(cupy.cuda.driver.CUDADriverError):
                ker((1,), (N//inner_chunk,), (x, N, inner_chunk))
        else:  # nvcc
            with pytest.raises(cupy.cuda.compiler.CompileException):
                ker((1,), (N//inner_chunk,), (x, N, inner_chunk)) 
开发者ID:cupy,项目名称:cupy,代码行数:16,代码来源:test_raw.py

示例8: test_context_switch_RawKernel

# 需要导入模块: import cupy [as 别名]
# 或者: from cupy import RawKernel [as 别名]
def test_context_switch_RawKernel(self):
        # run test_basic() on another device

        # For RawKernel, we need to launch it once to force compiling
        x1, x2, y = self._helper(self.kern, cupy.float32)

        with cupy.cuda.Device(1):
            x1, x2, y = self._helper(self.kern, cupy.float32)
            assert cupy.allclose(y, x1 + x2) 
开发者ID:cupy,项目名称:cupy,代码行数:11,代码来源:test_raw.py


注:本文中的cupy.RawKernel方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。