本文整理汇总了Python中pycuda.driver.Out方法的典型用法代码示例。如果您正苦于以下问题:Python driver.Out方法的具体用法?Python driver.Out怎么用?Python driver.Out使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycuda.driver
的用法示例。
在下文中一共展示了driver.Out方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: maximum_filter_2d
# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Out [as 别名]
def maximum_filter_2d(arr2D, footprint): ## Make sure arr2D is our datatype float32 and footprint of int32
arr2DMaxed = numpy.empty_like(arr2D)
head, tail = os.path.split(os.path.abspath(__file__)) # Used so that we can always get the kernel which should be in the same directory as this file
maxFunction = open(head + "/2DSlidingMaxFootprintKernel.c", "rt")
maxFunction = SourceModule(maxFunction.read())
slidingMaxKernel = maxFunction.get_function("slidingMaxiumum2D")
blockSize = [16, 16] # To-do: Add a variable to this, can affect performance based on GPU
gridSize = getGridSize(blockSize, arr2D.shape) # Get the size of our grid based on the size of a grid (blocksize)
slidingMaxKernel(cuda.In(arr2D), # Input
cuda.Out(arr2DMaxed), # Output
numpy.int32(footprint.shape[1]), # Kernel Size
numpy.int32(arr2D.shape[1]), # Row Stride
numpy.int32(1), # Column Stride
numpy.int32(int(arr2D.shape[1])), # Array Column Count
numpy.int32(int(arr2D.shape[0])), # Array Row Count
cuda.In(footprint),
block=(blockSize[0],blockSize[1],1),
grid=(gridSize[0],gridSize[1],1)
)
return arr2DMaxed
示例2: cuda_render
# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Out [as 别名]
def cuda_render(self,pts,face_set):
pts = pts.astype(np.float32)
v = ((np.round(self.fy*pts[:,1]/pts[:,2]+self.cy)).astype(np.int)).astype(np.float32)
u = ((np.round(self.fx*pts[:,0]/pts[:,2]+self.cx)).astype(np.int)).astype(np.float32)
depth_b = gpuarray.zeros((self.res_y*self.res_x), dtype=np.float32)+100#+90000
depth_mask = np.zeros((self.res_y*self.res_x),dtype=np.float32)
bbox = gpuarray.zeros((4),dtype=np.float32)
bbox[0:2]=np.array([9999,9999],dtype=np.float32)
max_idx = np.ones((face_set.shape[0]), dtype=np.float32)
grid_n= int((face_set.shape[0]/self.n_block))+1
self.rendering(drv.In(v[face_set[:,0]]), drv.In(v[face_set[:,1]]),drv.In(v[face_set[:,2]]),
drv.In(u[face_set[:,0]]), drv.In(u[face_set[:,1]]),drv.In(u[face_set[:,2]]),
drv.In(pts[face_set[:,0],2]), drv.In(pts[face_set[:,1],2]),drv.In(pts[face_set[:,2],2]),
depth_b,drv.In(max_idx), drv.Out(depth_mask),bbox,
block=(self.n_block, 1, 1), grid=(grid_n, 1, 1))
img = depth_b.get()
img[img==100]=0
img= np.reshape(img,(self.res_y,self.res_x))
mask = np.reshape(depth_mask,(self.res_y,self.res_x)).astype(bool)
bbox_final = bbox.get()
return img,mask,bbox_final.astype(np.int)
示例3: test_pycuda_only
# 需要导入模块: from pycuda import driver [as 别名]
# 或者: from pycuda.driver import Out [as 别名]
def test_pycuda_only():
"""Run pycuda only example to test that pycuda works."""
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
""")
multiply_them = mod.get_function("multiply_them")
# Test with pycuda in/out of numpy.ndarray
a = numpy.random.randn(100).astype(numpy.float32)
b = numpy.random.randn(100).astype(numpy.float32)
dest = numpy.zeros_like(a)
multiply_them(
drv.Out(dest), drv.In(a), drv.In(b),
block=(400, 1, 1), grid=(1, 1))
assert (dest == a * b).all()