本文整理汇总了Python中numba.cuda方法的典型用法代码示例。如果您正苦于以下问题:Python numba.cuda方法的具体用法?Python numba.cuda怎么用?Python numba.cuda使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类numba
的用法示例。
在下文中一共展示了numba.cuda方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: quadrilateral_intersection
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def quadrilateral_intersection(pts1, pts2, int_pts):
num_of_inter = 0
for i in range(4):
if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
int_pts[num_of_inter * 2] = pts1[2 * i]
int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
num_of_inter += 1
if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
int_pts[num_of_inter * 2] = pts2[2 * i]
int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
num_of_inter += 1
temp_pts = cuda.local.array((2, ), dtype=numba.float32)
for i in range(4):
for j in range(4):
has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
if has_pts:
int_pts[num_of_inter * 2] = temp_pts[0]
int_pts[num_of_inter * 2 + 1] = temp_pts[1]
num_of_inter += 1
return num_of_inter
示例2: rbbox_to_corners
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def rbbox_to_corners(corners, rbbox):
# generate clockwise corners and rotate it clockwise
angle = rbbox[4]
a_cos = math.cos(angle)
a_sin = math.sin(angle)
center_x = rbbox[0]
center_y = rbbox[1]
x_d = rbbox[2]
y_d = rbbox[3]
corners_x = cuda.local.array((4, ), dtype=numba.float32)
corners_y = cuda.local.array((4, ), dtype=numba.float32)
corners_x[0] = -x_d / 2
corners_x[1] = -x_d / 2
corners_x[2] = x_d / 2
corners_x[3] = x_d / 2
corners_y[0] = -y_d / 2
corners_y[1] = y_d / 2
corners_y[2] = y_d / 2
corners_y[3] = -y_d / 2
for i in range(4):
corners[2 *
i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
corners[2 * i
+ 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
示例3: quadrilateral_intersection
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def quadrilateral_intersection(pts1, pts2, int_pts):
num_of_inter = 0
for i in range(4):
if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
int_pts[num_of_inter * 2] = pts1[2 * i]
int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
num_of_inter += 1
if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
int_pts[num_of_inter * 2] = pts2[2 * i]
int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
num_of_inter += 1
temp_pts = cuda.local.array((2,), dtype=numba.float32)
for i in range(4):
for j in range(4):
has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
if has_pts:
int_pts[num_of_inter * 2] = temp_pts[0]
int_pts[num_of_inter * 2 + 1] = temp_pts[1]
num_of_inter += 1
return num_of_inter
示例4: rbbox_to_corners
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def rbbox_to_corners(corners, rbbox):
# generate clockwise corners and rotate it clockwise
angle = rbbox[4]
a_cos = math.cos(angle)
a_sin = math.sin(angle)
center_x = rbbox[0]
center_y = rbbox[1]
x_d = rbbox[2]
y_d = rbbox[3]
corners_x = cuda.local.array((4,), dtype=numba.float32)
corners_y = cuda.local.array((4,), dtype=numba.float32)
corners_x[0] = -x_d / 2
corners_x[1] = -x_d / 2
corners_x[2] = x_d / 2
corners_x[3] = x_d / 2
corners_y[0] = -y_d / 2
corners_y[1] = y_d / 2
corners_y[2] = y_d / 2
corners_y[3] = -y_d / 2
for i in range(4):
corners[2 *
i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
corners[2 * i
+ 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
示例5: rbbox_to_corners
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def rbbox_to_corners(corners, rbbox):
# generate clockwise corners and rotate it clockwise
angle = rbbox[4]
a_cos = math.cos(angle)
a_sin = math.sin(angle)
center_x = rbbox[0]
center_y = rbbox[1]
x_d = rbbox[2]
y_d = rbbox[3]
corners_x = cuda.local.array((4,), dtype=numba.float32)
corners_y = cuda.local.array((4,), dtype=numba.float32)
corners_x[0] = -x_d / 2
corners_x[1] = -x_d / 2
corners_x[2] = x_d / 2
corners_x[3] = x_d / 2
corners_y[0] = -y_d / 2
corners_y[1] = y_d / 2
corners_y[2] = y_d / 2
corners_y[3] = -y_d / 2
for i in range(4):
corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
示例6: rbbox_to_corners
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def rbbox_to_corners(corners, rbbox):
# generate clockwise corners and rotate it clockwise
angle = rbbox[4]
a_cos = math.cos(angle)
a_sin = math.sin(angle)
center_x = rbbox[0]
center_y = rbbox[1]
x_d = rbbox[2]
y_d = rbbox[3]
corners_x = cuda.local.array((4, ), dtype=numba.float32)
corners_y = cuda.local.array((4, ), dtype=numba.float32)
corners_x[0] = -x_d / 2
corners_x[1] = -x_d / 2
corners_x[2] = x_d / 2
corners_x[3] = x_d / 2
corners_y[0] = -y_d / 2
corners_y[1] = y_d / 2
corners_y[2] = y_d / 2
corners_y[3] = -y_d / 2
for i in range(4):
corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
corners[2 * i +
1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y
示例7: test_rotor_between_lines
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def test_rotor_between_lines(self):
# Make a big array of data
n_mvs = 1000
mv_a_array = np.array([random_line().value for i in range(n_mvs)])
mv_b_array = np.array([random_line().value for i in range(n_mvs)])
mv_c_array = np.zeros(mv_b_array.shape)
mv_d_array = np.zeros(mv_b_array.shape)
print('Starting kernel')
t = time.time()
blockdim = 64
griddim = int(math.ceil(n_mvs / blockdim))
rotor_between_lines_kernel[griddim, blockdim](mv_a_array, mv_b_array, mv_c_array)
end_time = time.time() - t
print('Kernel finished')
print(end_time)
# Now do the non cuda kernel
t = time.time()
for i in range(mv_a_array.shape[0]):
mv_d_array[i, :] = val_rotor_between_lines(mv_a_array[i, :], mv_b_array[i, :])
print(time.time() - t)
np.testing.assert_almost_equal(mv_c_array, mv_d_array)
示例8: test_normalise_mvs_kernel
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def test_normalise_mvs_kernel(self):
n_mvs = 500
mv_a_array = np.pi*np.array([random_line().value for i in range(n_mvs)])
mv_d_array = np.zeros(mv_a_array.shape)
mv_b_array = mv_a_array.copy()
print('Starting kernel')
t = time.time()
blockdim = 64
griddim = int(math.ceil(n_mvs / blockdim))
normalise_mvs_kernel[griddim, blockdim](mv_a_array)
end_time = time.time() - t
print('Kernel finished')
print(end_time)
# Now do the non cuda kernel
t = time.time()
for i in range(mv_a_array.shape[0]):
mv_a = cf.MultiVector(self.layout, mv_b_array[i, :])
mv_d_array[i, :] = mv_a.normal().value
print(time.time() - t)
np.testing.assert_almost_equal(mv_a_array, mv_d_array)
示例9: test_gp
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def test_gp(self):
n_mvs = 500
mv_a_array = np.array([self.layout.randomMV().value for i in range(n_mvs)])
mv_b_array = np.array([self.layout.randomMV().value for i in range(n_mvs)])
mv_c_array = np.zeros(mv_b_array.shape)
mv_d_array = np.zeros(mv_b_array.shape)
print('Starting kernel')
t = time.time()
blockdim = 64
griddim = int(math.ceil(n_mvs/blockdim))
gp_kernel[griddim, blockdim](mv_a_array, mv_b_array, mv_c_array)
end_time = time.time() - t
print('Kernel finished')
print(end_time)
# Now do the non cuda kernel
t = time.time()
for i in range(mv_a_array.shape[0]):
mv_d_array[i, :] = self.layout.gmt_func(mv_a_array[i, :], mv_b_array[i, :])
print(time.time() - t)
np.testing.assert_almost_equal(mv_c_array, mv_d_array)
示例10: test_assign_objects_to_objects_cuda
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def test_assign_objects_to_objects_cuda(self):
n_repeats = 5
for obj_gen in object_generators:
print(obj_gen.__name__)
for i in range(n_repeats):
object_set_a = [obj_gen() for i in range(20)]
object_set_b = [l for l in object_set_a]
label_a, costs_a = assign_measurements_to_objects_matrix(object_set_a, object_set_b, cuda=True)
try:
np.testing.assert_equal(label_a, np.array(range(len(label_a))))
except AssertionError:
label_a, costs_a = assign_measurements_to_objects_matrix(object_set_a, object_set_b, cuda=True)
np.testing.assert_equal(label_a, np.array(range(len(label_a))))
n_repeats = 5
for obj_gen in object_generators:
print(obj_gen.__name__)
for i in range(n_repeats):
r = random_rotation_translation_rotor(0.001, np.pi / 32)
object_set_a = [obj_gen() for i in range(20)]
object_set_b = [l for l in object_set_a]
label_a, costs_a = assign_measurements_to_objects_matrix(object_set_a, object_set_b, cuda=True)
np.testing.assert_equal(label_a, np.array(range(len(label_a))))
示例11: sort_vertex_in_convex_polygon
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def sort_vertex_in_convex_polygon(int_pts, num_of_inter):
if num_of_inter > 0:
center = cuda.local.array((2, ), dtype=numba.float32)
center[:] = 0.0
for i in range(num_of_inter):
center[0] += int_pts[2 * i]
center[1] += int_pts[2 * i + 1]
center[0] /= num_of_inter
center[1] /= num_of_inter
v = cuda.local.array((2, ), dtype=numba.float32)
vs = cuda.local.array((16, ), dtype=numba.float32)
for i in range(num_of_inter):
v[0] = int_pts[2 * i] - center[0]
v[1] = int_pts[2 * i + 1] - center[1]
d = math.sqrt(v[0] * v[0] + v[1] * v[1])
v[0] = v[0] / d
v[1] = v[1] / d
if v[1] < 0:
v[0] = -2 - v[0]
vs[i] = v[0]
j = 0
temp = 0
for i in range(1, num_of_inter):
if vs[i - 1] > vs[i]:
temp = vs[i]
tx = int_pts[2 * i]
ty = int_pts[2 * i + 1]
j = i
while j > 0 and vs[j - 1] > temp:
vs[j] = vs[j - 1]
int_pts[j * 2] = int_pts[j * 2 - 2]
int_pts[j * 2 + 1] = int_pts[j * 2 - 1]
j -= 1
vs[j] = temp
int_pts[j * 2] = tx
int_pts[j * 2 + 1] = ty
示例12: line_segment_intersection
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
A = cuda.local.array((2, ), dtype=numba.float32)
B = cuda.local.array((2, ), dtype=numba.float32)
C = cuda.local.array((2, ), dtype=numba.float32)
D = cuda.local.array((2, ), dtype=numba.float32)
A[0] = pts1[2 * i]
A[1] = pts1[2 * i + 1]
B[0] = pts1[2 * ((i + 1) % 4)]
B[1] = pts1[2 * ((i + 1) % 4) + 1]
C[0] = pts2[2 * j]
C[1] = pts2[2 * j + 1]
D[0] = pts2[2 * ((j + 1) % 4)]
D[1] = pts2[2 * ((j + 1) % 4) + 1]
BA0 = B[0] - A[0]
BA1 = B[1] - A[1]
DA0 = D[0] - A[0]
CA0 = C[0] - A[0]
DA1 = D[1] - A[1]
CA1 = C[1] - A[1]
acd = DA1 * CA0 > CA1 * DA0
bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0])
if acd != bcd:
abc = CA1 * BA0 > BA1 * CA0
abd = DA1 * BA0 > BA1 * DA0
if abc != abd:
DC0 = D[0] - C[0]
DC1 = D[1] - C[1]
ABBA = A[0] * B[1] - B[0] * A[1]
CDDC = C[0] * D[1] - D[0] * C[1]
DH = BA1 * DC0 - BA0 * DC1
Dx = ABBA * DC0 - BA0 * CDDC
Dy = ABBA * DC1 - BA1 * CDDC
temp_pts[0] = Dx / DH
temp_pts[1] = Dy / DH
return True
return False
示例13: line_segment_intersection_v1
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts):
a = cuda.local.array((2, ), dtype=numba.float32)
b = cuda.local.array((2, ), dtype=numba.float32)
c = cuda.local.array((2, ), dtype=numba.float32)
d = cuda.local.array((2, ), dtype=numba.float32)
a[0] = pts1[2 * i]
a[1] = pts1[2 * i + 1]
b[0] = pts1[2 * ((i + 1) % 4)]
b[1] = pts1[2 * ((i + 1) % 4) + 1]
c[0] = pts2[2 * j]
c[1] = pts2[2 * j + 1]
d[0] = pts2[2 * ((j + 1) % 4)]
d[1] = pts2[2 * ((j + 1) % 4) + 1]
area_abc = trangle_area(a, b, c)
area_abd = trangle_area(a, b, d)
if area_abc * area_abd >= 0:
return False
area_cda = trangle_area(c, d, a)
area_cdb = area_cda + area_abc - area_abd
if area_cda * area_cdb >= 0:
return False
t = area_cda / (area_abd - area_abc)
dx = t * (b[0] - a[0])
dy = t * (b[1] - a[1])
temp_pts[0] = a[0] + dx
temp_pts[1] = a[1] + dy
return True
示例14: rotate_iou_kernel_eval
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def rotate_iou_kernel_eval(N, K, dev_boxes, dev_query_boxes, dev_iou, criterion=-1):
threadsPerBlock = 8 * 8
row_start = cuda.blockIdx.x
col_start = cuda.blockIdx.y
tx = cuda.threadIdx.x
row_size = min(N - row_start * threadsPerBlock, threadsPerBlock)
col_size = min(K - col_start * threadsPerBlock, threadsPerBlock)
block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
dev_query_box_idx = threadsPerBlock * col_start + tx
dev_box_idx = threadsPerBlock * row_start + tx
if (tx < col_size):
block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0]
block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1]
block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2]
block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3]
block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4]
if (tx < row_size):
block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]
block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]
block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]
block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]
block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]
cuda.syncthreads()
if tx < row_size:
for i in range(col_size):
offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i
dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5],
block_boxes[tx * 5:tx * 5 + 5], criterion)
示例15: rotate_iou_gpu_eval
# 需要导入模块: import numba [as 别名]
# 或者: from numba import cuda [as 别名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
"""rotated box iou running in gpu. 500x faster than cpu version
(take 5ms in one example with numba.cuda code).
convert from [this project](
https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
Args:
boxes (float tensor: [N, 5]): rbboxes. format: centers, dims,
angles(clockwise when positive)
query_boxes (float tensor: [K, 5]): [description]
device_id (int, optional): Defaults to 0. [description]
Returns:
[type]: [description]
"""
box_dtype = boxes.dtype
boxes = boxes.astype(np.float32)
query_boxes = query_boxes.astype(np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
iou = np.zeros((N, K), dtype=np.float32)
if N == 0 or K == 0:
return iou
threadsPerBlock = 8 * 8
cuda.select_device(device_id)
blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
stream = cuda.stream()
with stream.auto_synchronize():
boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
iou_dev = cuda.to_device(iou.reshape([-1]), stream)
rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
return iou.astype(boxes.dtype)