本文整理匯總了Python中numba.cuda.select_device方法的典型用法代碼示例。如果您正苦於以下問題:Python cuda.select_device方法的具體用法?Python cuda.select_device怎麽用?Python cuda.select_device使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類numba.cuda
的用法示例。
在下文中一共展示了cuda.select_device方法的14個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: rotate_iou_gpu_eval
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例2: rotate_iou_gpu_eval
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例3: rotate_iou_gpu_eval
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""
rotated box iou running in gpu. 8x faster than cpu version (take 5ms in one example with numba.cuda code).
convert from [this project](https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
:param boxes: rbboxes, format: centers, dims, angles(clockwise when positive), FloatTensor[N, 5]
:param query_boxes: FloatTensor[K, 5]
:param criterion: optional, default: -1
:param device_id: int, optional, default: 0
:return:
"""
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threads_per_block = 8 * 8
cuda.select_device(device_id)
blocks_per_grid = (div_up(N, threads_per_block), div_up(K, threads_per_block))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blocks_per_grid, threads_per_block, stream](N, K, boxes_dev, query_boxes_dev,
iou_dev, criterion)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例4: no_gpu
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def no_gpu():
"""Check for the required GPU dependencies"""
try:
from numba import cuda
import cudf # noqa
try:
cuda.select_device(0)
except cuda.cudadrv.error.CudaDriverError:
return True
except ImportError:
return True
return False
示例5: nms_gpu
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def nms_gpu(dets, nms_overlap_thresh, device_id=0):
"""nms in gpu.
Args:
dets ([type]): [description]
nms_overlap_thresh ([type]): [description]
device_id ([type], optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
boxes_num = dets.shape[0]
keep_out = np.zeros([boxes_num], dtype=np.int32)
scores = dets[:, 4]
order = scores.argsort()[::-1].astype(np.int32)
boxes_host = dets[order, :]
threadsPerBlock = 8 * 8
col_blocks = div_up(boxes_num, threadsPerBlock)
cuda.select_device(device_id)
mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64)
blockspergrid = (div_up(boxes_num, threadsPerBlock),
div_up(boxes_num, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream)
mask_dev = cuda.to_device(mask_host, stream)
nms_kernel[blockspergrid, threadsPerBlock, stream](
boxes_num, nms_overlap_thresh, boxes_dev, mask_dev)
mask_dev.copy_to_host(mask_host, stream=stream)
# stream.synchronize()
num_out = nms_postprocess(keep_out, mask_host, boxes_num)
keep = keep_out[:num_out]
return list(order[keep])
示例6: rotate_nms_gpu
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_nms_gpu(dets, nms_overlap_thresh, device_id=0):
"""nms in gpu. WARNING: this function can provide right result
but its performance isn't be tested
Args:
dets ([type]): [description]
nms_overlap_thresh ([type]): [description]
device_id ([type], optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
dets = dets.astype(np.float32)
boxes_num = dets.shape[0]
keep_out = np.zeros([boxes_num], dtype=np.int32)
scores = dets[:, 5]
order = scores.argsort()[::-1].astype(np.int32)
boxes_host = dets[order, :]
threadsPerBlock = 8 * 8
col_blocks = div_up(boxes_num, threadsPerBlock)
cuda.select_device(device_id)
# mask_host shape: boxes_num * col_blocks * sizeof(np.uint64)
mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64)
blockspergrid = (div_up(boxes_num, threadsPerBlock),
div_up(boxes_num, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream)
mask_dev = cuda.to_device(mask_host, stream)
rotate_nms_kernel[blockspergrid, threadsPerBlock, stream](
boxes_num, nms_overlap_thresh, boxes_dev, mask_dev)
mask_dev.copy_to_host(mask_host, stream=stream)
num_out = nms_postprocess(keep_out, mask_host, boxes_num)
keep = keep_out[:num_out]
return list(order[keep])
示例7: rotate_iou_gpu
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu(boxes, query_boxes, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例8: rotate_iou_gpu_eval
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 8x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例9: rotate_iou_gpu_eval
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例10: nms_gpu
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def nms_gpu(dets, nms_overlap_thresh, device_id=0):
"""nms in gpu.
Args:
dets ([type]): [description]
nms_overlap_thresh ([type]): [description]
device_id ([type], optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
boxes_num = dets.shape[0]
keep_out = np.zeros([boxes_num], dtype=np.int32)
scores = dets[:, 4]
order = scores.argsort()[::-1].astype(np.int32)
boxes_host = dets[order, :]
threadsPerBlock = 8 * 8
col_blocks = div_up(boxes_num, threadsPerBlock)
cuda.select_device(device_id)
mask_host = np.zeros((boxes_num * col_blocks,), dtype=np.uint64)
blockspergrid = (
div_up(boxes_num, threadsPerBlock),
div_up(boxes_num, threadsPerBlock),
)
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream)
mask_dev = cuda.to_device(mask_host, stream)
nms_kernel[blockspergrid, threadsPerBlock, stream](
boxes_num, nms_overlap_thresh, boxes_dev, mask_dev
)
mask_dev.copy_to_host(mask_host, stream=stream)
# stream.synchronize()
num_out = nms_postprocess(keep_out, mask_host, boxes_num)
keep = keep_out[:num_out]
return list(order[keep])
示例11: rotate_nms_gpu
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_nms_gpu(dets, nms_overlap_thresh, device_id=0):
"""nms in gpu. WARNING: this function can provide right result
but its performance isn't be tested
Args:
dets ([type]): [description]
nms_overlap_thresh ([type]): [description]
device_id ([type], optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
dets = dets.astype(np.float32)
boxes_num = dets.shape[0]
keep_out = np.zeros([boxes_num], dtype=np.int32)
scores = dets[:, 5]
order = scores.argsort()[::-1].astype(np.int32)
boxes_host = dets[order, :]
threadsPerBlock = 8 * 8
col_blocks = div_up(boxes_num, threadsPerBlock)
cuda.select_device(device_id)
# mask_host shape: boxes_num * col_blocks * sizeof(np.uint64)
mask_host = np.zeros((boxes_num * col_blocks,), dtype=np.uint64)
blockspergrid = (
div_up(boxes_num, threadsPerBlock),
div_up(boxes_num, threadsPerBlock),
)
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream)
mask_dev = cuda.to_device(mask_host, stream)
rotate_nms_kernel[blockspergrid, threadsPerBlock, stream](
boxes_num, nms_overlap_thresh, boxes_dev, mask_dev
)
mask_dev.copy_to_host(mask_host, stream=stream)
num_out = nms_postprocess(keep_out, mask_host, boxes_num)
keep = keep_out[:num_out]
return list(order[keep])
示例12: rotate_iou_gpu
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu(boxes, query_boxes, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev
)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例13: rotate_iou_gpu_eval
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion
)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)
示例14: rotate_iou_gpu_eval
# 需要導入模塊: from numba import cuda [as 別名]
# 或者: from numba.cuda import select_device [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion
)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)