當前位置: 首頁>>代碼示例>>Python>>正文


Python numba.cuda方法代碼示例

本文整理匯總了Python中numba.cuda方法的典型用法代碼示例。如果您正苦於以下問題:Python numba.cuda方法的具體用法?Python numba.cuda怎麽用?Python numba.cuda使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在numba的用法示例。


在下文中一共展示了numba.cuda方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: quadrilateral_intersection

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def quadrilateral_intersection(pts1, pts2, int_pts):
    num_of_inter = 0
    for i in range(4):
        if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
            int_pts[num_of_inter * 2] = pts1[2 * i]
            int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
            num_of_inter += 1
        if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
            int_pts[num_of_inter * 2] = pts2[2 * i]
            int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
            num_of_inter += 1
    temp_pts = cuda.local.array((2, ), dtype=numba.float32)
    for i in range(4):
        for j in range(4):
            has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
            if has_pts:
                int_pts[num_of_inter * 2] = temp_pts[0]
                int_pts[num_of_inter * 2 + 1] = temp_pts[1]
                num_of_inter += 1

    return num_of_inter 
開發者ID:traveller59,項目名稱:kitti-object-eval-python,代碼行數:23,代碼來源:rotate_iou.py

示例2: rbbox_to_corners

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def rbbox_to_corners(corners, rbbox):
    # generate clockwise corners and rotate it clockwise
    angle = rbbox[4]
    a_cos = math.cos(angle)
    a_sin = math.sin(angle)
    center_x = rbbox[0]
    center_y = rbbox[1]
    x_d = rbbox[2]
    y_d = rbbox[3]
    corners_x = cuda.local.array((4, ), dtype=numba.float32)
    corners_y = cuda.local.array((4, ), dtype=numba.float32)
    corners_x[0] = -x_d / 2
    corners_x[1] = -x_d / 2
    corners_x[2] = x_d / 2
    corners_x[3] = x_d / 2
    corners_y[0] = -y_d / 2
    corners_y[1] = y_d / 2
    corners_y[2] = y_d / 2
    corners_y[3] = -y_d / 2
    for i in range(4):
        corners[2 *
                i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
        corners[2 * i
                + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y 
開發者ID:traveller59,項目名稱:kitti-object-eval-python,代碼行數:26,代碼來源:rotate_iou.py

示例3: quadrilateral_intersection

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def quadrilateral_intersection(pts1, pts2, int_pts):
    num_of_inter = 0
    for i in range(4):
        if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2):
            int_pts[num_of_inter * 2] = pts1[2 * i]
            int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1]
            num_of_inter += 1
        if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1):
            int_pts[num_of_inter * 2] = pts2[2 * i]
            int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1]
            num_of_inter += 1
    temp_pts = cuda.local.array((2,), dtype=numba.float32)
    for i in range(4):
        for j in range(4):
            has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts)
            if has_pts:
                int_pts[num_of_inter * 2] = temp_pts[0]
                int_pts[num_of_inter * 2 + 1] = temp_pts[1]
                num_of_inter += 1

    return num_of_inter 
開發者ID:ucbdrive,項目名稱:3d-vehicle-tracking,代碼行數:23,代碼來源:rotate_iou.py

示例4: rbbox_to_corners

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def rbbox_to_corners(corners, rbbox):
    # generate clockwise corners and rotate it clockwise
    angle = rbbox[4]
    a_cos = math.cos(angle)
    a_sin = math.sin(angle)
    center_x = rbbox[0]
    center_y = rbbox[1]
    x_d = rbbox[2]
    y_d = rbbox[3]
    corners_x = cuda.local.array((4,), dtype=numba.float32)
    corners_y = cuda.local.array((4,), dtype=numba.float32)
    corners_x[0] = -x_d / 2
    corners_x[1] = -x_d / 2
    corners_x[2] = x_d / 2
    corners_x[3] = x_d / 2
    corners_y[0] = -y_d / 2
    corners_y[1] = y_d / 2
    corners_y[2] = y_d / 2
    corners_y[3] = -y_d / 2
    for i in range(4):
        corners[2 *
                i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
        corners[2 * i
                + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y 
開發者ID:ucbdrive,項目名稱:3d-vehicle-tracking,代碼行數:26,代碼來源:rotate_iou.py

示例5: rbbox_to_corners

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def rbbox_to_corners(corners, rbbox):
    # generate clockwise corners and rotate it clockwise
    angle = rbbox[4]
    a_cos = math.cos(angle)
    a_sin = math.sin(angle)
    center_x = rbbox[0]
    center_y = rbbox[1]
    x_d = rbbox[2]
    y_d = rbbox[3]
    corners_x = cuda.local.array((4,), dtype=numba.float32)
    corners_y = cuda.local.array((4,), dtype=numba.float32)
    corners_x[0] = -x_d / 2
    corners_x[1] = -x_d / 2
    corners_x[2] = x_d / 2
    corners_x[3] = x_d / 2
    corners_y[0] = -y_d / 2
    corners_y[1] = y_d / 2
    corners_y[2] = y_d / 2
    corners_y[3] = -y_d / 2
    for i in range(4):
        corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
        corners[2 * i + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y 
開發者ID:mit-han-lab,項目名稱:pvcnn,代碼行數:24,代碼來源:iou.py

示例6: rbbox_to_corners

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def rbbox_to_corners(corners, rbbox):
    # generate clockwise corners and rotate it clockwise
    angle = rbbox[4]
    a_cos = math.cos(angle)
    a_sin = math.sin(angle)
    center_x = rbbox[0]
    center_y = rbbox[1]
    x_d = rbbox[2]
    y_d = rbbox[3]
    corners_x = cuda.local.array((4, ), dtype=numba.float32)
    corners_y = cuda.local.array((4, ), dtype=numba.float32)
    corners_x[0] = -x_d / 2
    corners_x[1] = -x_d / 2
    corners_x[2] = x_d / 2
    corners_x[3] = x_d / 2
    corners_y[0] = -y_d / 2
    corners_y[1] = y_d / 2
    corners_y[2] = y_d / 2
    corners_y[3] = -y_d / 2
    for i in range(4):
        corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x
        corners[2 * i +
                1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y 
開發者ID:traveller59,項目名稱:second.pytorch,代碼行數:25,代碼來源:nms_gpu.py

示例7: test_rotor_between_lines

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def test_rotor_between_lines(self):
        # Make a big array of data
        n_mvs = 1000
        mv_a_array = np.array([random_line().value for i in range(n_mvs)])
        mv_b_array = np.array([random_line().value for i in range(n_mvs)])

        mv_c_array = np.zeros(mv_b_array.shape)
        mv_d_array = np.zeros(mv_b_array.shape)

        print('Starting kernel')
        t = time.time()
        blockdim = 64
        griddim = int(math.ceil(n_mvs / blockdim))
        rotor_between_lines_kernel[griddim, blockdim](mv_a_array, mv_b_array, mv_c_array)
        end_time = time.time() - t
        print('Kernel finished')
        print(end_time)

        # Now do the non cuda kernel
        t = time.time()
        for i in range(mv_a_array.shape[0]):
            mv_d_array[i, :] = val_rotor_between_lines(mv_a_array[i, :], mv_b_array[i, :])
        print(time.time() - t)

        np.testing.assert_almost_equal(mv_c_array, mv_d_array) 
開發者ID:pygae,項目名稱:clifford,代碼行數:27,代碼來源:test_g3c_CUDA.py

示例8: test_normalise_mvs_kernel

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def test_normalise_mvs_kernel(self):

        n_mvs = 500
        mv_a_array = np.pi*np.array([random_line().value for i in range(n_mvs)])
        mv_d_array = np.zeros(mv_a_array.shape)
        mv_b_array = mv_a_array.copy()

        print('Starting kernel')
        t = time.time()
        blockdim = 64
        griddim = int(math.ceil(n_mvs / blockdim))
        normalise_mvs_kernel[griddim, blockdim](mv_a_array)
        end_time = time.time() - t
        print('Kernel finished')
        print(end_time)

        # Now do the non cuda kernel
        t = time.time()
        for i in range(mv_a_array.shape[0]):
            mv_a = cf.MultiVector(self.layout, mv_b_array[i, :])
            mv_d_array[i, :] = mv_a.normal().value
        print(time.time() - t)

        np.testing.assert_almost_equal(mv_a_array, mv_d_array) 
開發者ID:pygae,項目名稱:clifford,代碼行數:26,代碼來源:test_g3c_CUDA.py

示例9: test_gp

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def test_gp(self):

        n_mvs = 500
        mv_a_array = np.array([self.layout.randomMV().value for i in range(n_mvs)])
        mv_b_array = np.array([self.layout.randomMV().value for i in range(n_mvs)])

        mv_c_array = np.zeros(mv_b_array.shape)
        mv_d_array = np.zeros(mv_b_array.shape)

        print('Starting kernel')
        t = time.time()
        blockdim = 64
        griddim = int(math.ceil(n_mvs/blockdim))
        gp_kernel[griddim, blockdim](mv_a_array, mv_b_array, mv_c_array)
        end_time = time.time() - t
        print('Kernel finished')
        print(end_time)

        # Now do the non cuda kernel
        t = time.time()
        for i in range(mv_a_array.shape[0]):
            mv_d_array[i, :] = self.layout.gmt_func(mv_a_array[i, :], mv_b_array[i, :])
        print(time.time() - t)

        np.testing.assert_almost_equal(mv_c_array, mv_d_array) 
開發者ID:pygae,項目名稱:clifford,代碼行數:27,代碼來源:test_g3c_CUDA.py

示例10: test_assign_objects_to_objects_cuda

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def test_assign_objects_to_objects_cuda(self):
        n_repeats = 5
        for obj_gen in object_generators:
            print(obj_gen.__name__)
            for i in range(n_repeats):
                object_set_a = [obj_gen() for i in range(20)]
                object_set_b = [l for l in object_set_a]
                label_a, costs_a = assign_measurements_to_objects_matrix(object_set_a, object_set_b, cuda=True)
                try:
                    np.testing.assert_equal(label_a, np.array(range(len(label_a))))
                except AssertionError:
                    label_a, costs_a = assign_measurements_to_objects_matrix(object_set_a, object_set_b, cuda=True)
                    np.testing.assert_equal(label_a, np.array(range(len(label_a))))

        n_repeats = 5
        for obj_gen in object_generators:
            print(obj_gen.__name__)
            for i in range(n_repeats):
                r = random_rotation_translation_rotor(0.001, np.pi / 32)

                object_set_a = [obj_gen() for i in range(20)]
                object_set_b = [l for l in object_set_a]
                label_a, costs_a = assign_measurements_to_objects_matrix(object_set_a, object_set_b, cuda=True)
                np.testing.assert_equal(label_a, np.array(range(len(label_a)))) 
開發者ID:pygae,項目名稱:clifford,代碼行數:26,代碼來源:test_g3c_CUDA.py

示例11: sort_vertex_in_convex_polygon

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def sort_vertex_in_convex_polygon(int_pts, num_of_inter):
    if num_of_inter > 0:
        center = cuda.local.array((2, ), dtype=numba.float32)
        center[:] = 0.0
        for i in range(num_of_inter):
            center[0] += int_pts[2 * i]
            center[1] += int_pts[2 * i + 1]
        center[0] /= num_of_inter
        center[1] /= num_of_inter
        v = cuda.local.array((2, ), dtype=numba.float32)
        vs = cuda.local.array((16, ), dtype=numba.float32)
        for i in range(num_of_inter):
            v[0] = int_pts[2 * i] - center[0]
            v[1] = int_pts[2 * i + 1] - center[1]
            d = math.sqrt(v[0] * v[0] + v[1] * v[1])
            v[0] = v[0] / d
            v[1] = v[1] / d
            if v[1] < 0:
                v[0] = -2 - v[0]
            vs[i] = v[0]
        j = 0
        temp = 0
        for i in range(1, num_of_inter):
            if vs[i - 1] > vs[i]:
                temp = vs[i]
                tx = int_pts[2 * i]
                ty = int_pts[2 * i + 1]
                j = i
                while j > 0 and vs[j - 1] > temp:
                    vs[j] = vs[j - 1]
                    int_pts[j * 2] = int_pts[j * 2 - 2]
                    int_pts[j * 2 + 1] = int_pts[j * 2 - 1]
                    j -= 1

                vs[j] = temp
                int_pts[j * 2] = tx
                int_pts[j * 2 + 1] = ty 
開發者ID:traveller59,項目名稱:kitti-object-eval-python,代碼行數:39,代碼來源:rotate_iou.py

示例12: line_segment_intersection

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def line_segment_intersection(pts1, pts2, i, j, temp_pts):
    A = cuda.local.array((2, ), dtype=numba.float32)
    B = cuda.local.array((2, ), dtype=numba.float32)
    C = cuda.local.array((2, ), dtype=numba.float32)
    D = cuda.local.array((2, ), dtype=numba.float32)

    A[0] = pts1[2 * i]
    A[1] = pts1[2 * i + 1]

    B[0] = pts1[2 * ((i + 1) % 4)]
    B[1] = pts1[2 * ((i + 1) % 4) + 1]

    C[0] = pts2[2 * j]
    C[1] = pts2[2 * j + 1]

    D[0] = pts2[2 * ((j + 1) % 4)]
    D[1] = pts2[2 * ((j + 1) % 4) + 1]
    BA0 = B[0] - A[0]
    BA1 = B[1] - A[1]
    DA0 = D[0] - A[0]
    CA0 = C[0] - A[0]
    DA1 = D[1] - A[1]
    CA1 = C[1] - A[1]
    acd = DA1 * CA0 > CA1 * DA0
    bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0])
    if acd != bcd:
        abc = CA1 * BA0 > BA1 * CA0
        abd = DA1 * BA0 > BA1 * DA0
        if abc != abd:
            DC0 = D[0] - C[0]
            DC1 = D[1] - C[1]
            ABBA = A[0] * B[1] - B[0] * A[1]
            CDDC = C[0] * D[1] - D[0] * C[1]
            DH = BA1 * DC0 - BA0 * DC1
            Dx = ABBA * DC0 - BA0 * CDDC
            Dy = ABBA * DC1 - BA1 * CDDC
            temp_pts[0] = Dx / DH
            temp_pts[1] = Dy / DH
            return True
    return False 
開發者ID:traveller59,項目名稱:kitti-object-eval-python,代碼行數:42,代碼來源:rotate_iou.py

示例13: line_segment_intersection_v1

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts):
    a = cuda.local.array((2, ), dtype=numba.float32)
    b = cuda.local.array((2, ), dtype=numba.float32)
    c = cuda.local.array((2, ), dtype=numba.float32)
    d = cuda.local.array((2, ), dtype=numba.float32)

    a[0] = pts1[2 * i]
    a[1] = pts1[2 * i + 1]

    b[0] = pts1[2 * ((i + 1) % 4)]
    b[1] = pts1[2 * ((i + 1) % 4) + 1]

    c[0] = pts2[2 * j]
    c[1] = pts2[2 * j + 1]

    d[0] = pts2[2 * ((j + 1) % 4)]
    d[1] = pts2[2 * ((j + 1) % 4) + 1]

    area_abc = trangle_area(a, b, c)
    area_abd = trangle_area(a, b, d)

    if area_abc * area_abd >= 0:
        return False

    area_cda = trangle_area(c, d, a)
    area_cdb = area_cda + area_abc - area_abd

    if area_cda * area_cdb >= 0:
        return False
    t = area_cda / (area_abd - area_abc)

    dx = t * (b[0] - a[0])
    dy = t * (b[1] - a[1])
    temp_pts[0] = a[0] + dx
    temp_pts[1] = a[1] + dy
    return True 
開發者ID:traveller59,項目名稱:kitti-object-eval-python,代碼行數:38,代碼來源:rotate_iou.py

示例14: rotate_iou_kernel_eval

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def rotate_iou_kernel_eval(N, K, dev_boxes, dev_query_boxes, dev_iou, criterion=-1):
    threadsPerBlock = 8 * 8
    row_start = cuda.blockIdx.x
    col_start = cuda.blockIdx.y
    tx = cuda.threadIdx.x
    row_size = min(N - row_start * threadsPerBlock, threadsPerBlock)
    col_size = min(K - col_start * threadsPerBlock, threadsPerBlock)
    block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)
    block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32)

    dev_query_box_idx = threadsPerBlock * col_start + tx
    dev_box_idx = threadsPerBlock * row_start + tx
    if (tx < col_size):
        block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0]
        block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1]
        block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2]
        block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3]
        block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4]
    if (tx < row_size):
        block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0]
        block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1]
        block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2]
        block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3]
        block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4]
    cuda.syncthreads()
    if tx < row_size:
        for i in range(col_size):
            offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i
            dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5],
                                           block_boxes[tx * 5:tx * 5 + 5], criterion) 
開發者ID:traveller59,項目名稱:kitti-object-eval-python,代碼行數:32,代碼來源:rotate_iou.py

示例15: rotate_iou_gpu_eval

# 需要導入模塊: import numba [as 別名]
# 或者: from numba import cuda [as 別名]
def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0):
    """rotated box iou running in gpu. 500x faster than cpu version
    (take 5ms in one example with numba.cuda code).
    convert from [this project](
        https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation).
    
    Args:
        boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, 
            angles(clockwise when positive)
        query_boxes (float tensor: [K, 5]): [description]
        device_id (int, optional): Defaults to 0. [description]
    
    Returns:
        [type]: [description]
    """
    box_dtype = boxes.dtype
    boxes = boxes.astype(np.float32)
    query_boxes = query_boxes.astype(np.float32)
    N = boxes.shape[0]
    K = query_boxes.shape[0]
    iou = np.zeros((N, K), dtype=np.float32)
    if N == 0 or K == 0:
        return iou
    threadsPerBlock = 8 * 8
    cuda.select_device(device_id)
    blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock))
    
    stream = cuda.stream()
    with stream.auto_synchronize():
        boxes_dev = cuda.to_device(boxes.reshape([-1]), stream)
        query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream)
        iou_dev = cuda.to_device(iou.reshape([-1]), stream)
        rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream](
            N, K, boxes_dev, query_boxes_dev, iou_dev, criterion)
        iou_dev.copy_to_host(iou.reshape([-1]), stream=stream)
    return iou.astype(boxes.dtype) 
開發者ID:traveller59,項目名稱:kitti-object-eval-python,代碼行數:38,代碼來源:rotate_iou.py


注:本文中的numba.cuda方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。