Python cupy.RawKernel方法代碼示例

本文整理匯總了Python中cupy.RawKernel方法的典型用法代碼示例。如果您正苦於以下問題：Python cupy.RawKernel方法的具體用法？Python cupy.RawKernel怎麽用？Python cupy.RawKernel使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類cupy的用法示例。

在下文中一共展示了cupy.RawKernel方法的8個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _call_nms_kernel

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def _call_nms_kernel(bbox, thresh):
    assert False, "Not supported."
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

開發者ID:pfnet-research，項目名稱:chainer-compiler，代碼行數:20，代碼來源:non_maximum_suppression.py

示例2: raw

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def raw(code, name, *args, **kwargs):
    """Creates a raw kernel function.

    This function uses :func:`~chainer.backends.cuda.memoize` to cache the
    resulting kernel object, i.e. the resulting kernel object is cached for
    each argument combination and CUDA device.

    The arguments are the same as those for :class:`cupy.RawKernel`.

    """
    check_cuda_available()
    return cupy.RawKernel(code, name, *args, **kwargs)


# ------------------------------------------------------------------------------
# numpy/cupy compatible coding
# ------------------------------------------------------------------------------

開發者ID:chainer，項目名稱:chainer，代碼行數:19，代碼來源:cuda.py

示例3: setUp

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def setUp(self):
        self.dev = cupy.cuda.runtime.getDevice()
        assert self.dev != 1

        global _test_cache_dir
        _test_cache_dir = tempfile.mkdtemp()
        os.environ['CUPY_CACHE_DIR'] = _test_cache_dir

        self.kern = cupy.RawKernel(
            _test_source1, 'test_sum',
            backend=self.backend)
        self.mod2 = cupy.RawModule(
            code=_test_source2,
            backend=self.backend)
        self.mod3 = cupy.RawModule(
            code=_test_source3,
            options=('-DPRECISION=2',),
            backend=self.backend)

開發者ID:cupy，項目名稱:cupy，代碼行數:20，代碼來源:test_raw.py

示例4: _call_nms_kernel

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec

開發者ID:chainer，項目名稱:chainercv，代碼行數:19，代碼來源:non_maximum_suppression.py

示例5: sgemm

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def sgemm(A, B,
          dim_x=16, dim_y=16, blk_m=64, blk_n=64, blk_k=4,
          dim_xa=64, dim_ya=4, dim_xb=4, dim_yb=64):
    assert A.dtype == cp.float32
    assert B.dtype == cp.float32
    assert(dim_x * dim_y == dim_xa * dim_ya == dim_xb * dim_yb)

    m, k = A.shape
    k, n = B.shape

    # Inputs matrices need to be in Fortran order.
    A = cp.asfortranarray(A)
    B = cp.asfortranarray(B)

    C = cp.empty((m, n), dtype=cp.float32, order='F')

    config = {'DIM_X': dim_x, 'DIM_Y': dim_y,
              'BLK_M': blk_m, 'BLK_N': blk_n, 'BLK_K': blk_k,
              'DIM_XA': dim_xa, 'DIM_YA': dim_ya,
              'DIM_XB': dim_xb, 'DIM_YB': dim_yb,
              'THR_M': blk_m // dim_x, 'THR_N': blk_n // dim_y}
    code = read_code(sgemm_file, params=config)
    kern = cp.RawKernel(code, 'sgemm')

    grid = (int(math.ceil(m / blk_m)), int(math.ceil(n / blk_n)), 1)
    block = (dim_x, dim_y, 1)
    args = (m, n, k, A, B, C)
    shared_mem = blk_k * (blk_m + 1) * 4 + blk_n * (blk_k + 1) * 4
    kern(grid, block, args=args, shared_mem=shared_mem)
    return C

開發者ID:cupy，項目名稱:cupy，代碼行數:32，代碼來源:sgemm.py

示例6: test_dynamical_parallelism

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def test_dynamical_parallelism(self):
        ker = cupy.RawKernel(_test_source4, 'test_kernel', options=('-dc',),
                             backend=self.backend)
        N = 169
        inner_chunk = 13
        x = cupy.zeros((N,), dtype=cupy.float32)
        ker((1,), (N//inner_chunk,), (x, N, inner_chunk))
        assert (x == 1.0).all()

開發者ID:cupy，項目名稱:cupy，代碼行數:10，代碼來源:test_raw.py

示例7: test_dynamical_parallelism_compile_failure

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def test_dynamical_parallelism_compile_failure(self):
        # no option for separate compilation is given should cause an error
        ker = cupy.RawKernel(_test_source4, 'test_kernel',
                             backend=self.backend)
        N = 10
        inner_chunk = 2
        x = cupy.zeros((N,), dtype=cupy.float32)
        if self.backend == 'nvrtc':
            # raised when calling ls.complete()
            with pytest.raises(cupy.cuda.driver.CUDADriverError):
                ker((1,), (N//inner_chunk,), (x, N, inner_chunk))
        else:  # nvcc
            with pytest.raises(cupy.cuda.compiler.CompileException):
                ker((1,), (N//inner_chunk,), (x, N, inner_chunk))

開發者ID:cupy，項目名稱:cupy，代碼行數:16，代碼來源:test_raw.py

示例8: test_context_switch_RawKernel

# 需要導入模塊: import cupy [as 別名]
# 或者: from cupy import RawKernel [as 別名]
def test_context_switch_RawKernel(self):
        # run test_basic() on another device

        # For RawKernel, we need to launch it once to force compiling
        x1, x2, y = self._helper(self.kern, cupy.float32)

        with cupy.cuda.Device(1):
            x1, x2, y = self._helper(self.kern, cupy.float32)
            assert cupy.allclose(y, x1 + x2)

開發者ID:cupy，項目名稱:cupy，代碼行數:11，代碼來源:test_raw.py

注：本文中的cupy.RawKernel方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。