当前位置: 首页>>代码示例>>Python>>正文


Python OCLArray.zeros方法代码示例

本文整理汇总了Python中gputools.OCLArray.zeros方法的典型用法代码示例。如果您正苦于以下问题:Python OCLArray.zeros方法的具体用法?Python OCLArray.zeros怎么用?Python OCLArray.zeros使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在gputools.OCLArray的用法示例。


在下文中一共展示了OCLArray.zeros方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setup

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]
    def setup(self, size, units, lam=0.5, n0=1.0, use_fresnel_approx=False):
        """
            sets up the internal variables e.g. propagators etc...

            :param size:  the size of the geometry in pixels (Nx,Ny,Nz)
            :param units: the phyiscal units of each voxel in microns (dx,dy,dz)
            :param lam: the wavelength of light in microns
            :param n0:  the refractive index of the surrounding media
            :param use_fresnel_approx:  if True, uses fresnel approximation for propagator


        """
        Bpm3d_Base.setup(self, size, units, lam=lam, n0=n0, use_fresnel_approx=use_fresnel_approx)

        # setting up the gpu buffers and kernels
        self.program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        Nx, Ny = self.size[:2]
        plan = fft_plan(())
        self._H_g = OCLArray.from_array(self._H.astype(np.complex64))

        self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32))
        self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32))

        self.scatter_cross_sec_g = OCLArray.zeros(Nz, "float32")
        self.gfactor_g = OCLArray.zeros(Nz, "float32")

        self.reduce_kernel = OCLReductionKernel(
            np.float32,
            neutral="0",
            reduce_expr="a+b",
            map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)",
            arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain",
        )
开发者ID:maweigert,项目名称:bpm,代码行数:36,代码来源:bpm_class.py

示例2: nlm3

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]
def nlm3(data,sigma, size_filter = 2, size_search = 3):
    """for noise level of sigma_0, choose sigma = 1.5*sigma_0
    """

    prog = OCLProgram(abspath("kernels/nlm3.cl"),
                      build_options="-D FS=%i -D BS=%i"%(size_filter,size_search))


    data = data.astype(np.float32, copy = False)
    img = OCLImage.from_array(data)

    distImg = OCLImage.empty_like(data)

    distImg = OCLImage.empty_like(data)
    tmpImg = OCLImage.empty_like(data)
    tmpImg2 = OCLImage.empty_like(data)

    accBuf = OCLArray.zeros(data.shape,np.float32)    
    weightBuf = OCLArray.zeros(data.shape,np.float32)

    for dx in range(size_search+1):
        for dy in range(-size_search,size_search+1):
            for dz in range(-size_search,size_search+1):
                prog.run_kernel("dist",img.shape,None,
                                img,tmpImg,np.int32(dx),np.int32(dy),np.int32(dz))
                
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,tmpImg2,np.int32(1))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg2,tmpImg,np.int32(2))
                prog.run_kernel("convolve",img.shape,None,
                                tmpImg,distImg,np.int32(4))

                prog.run_kernel("computePlus",img.shape,None,
                                img,distImg,accBuf.data,weightBuf.data,
                                np.int32(img.shape[0]),
                                np.int32(img.shape[1]),
                                np.int32(img.shape[2]),
                                np.int32(dx),np.int32(dy),np.int32(dz),
                                np.float32(sigma))

                if any([dx,dy,dz]):
                    prog.run_kernel("computeMinus",img.shape,None,
                                    img,distImg,accBuf.data,weightBuf.data,
                                    np.int32(img.shape[0]),
                                    np.int32(img.shape[1]),
                                    np.int32(img.shape[2]),
                                    np.int32(dx),np.int32(dy),np.int32(dz),
                                    np.float32(sigma))

    acc  = accBuf.get()
    weights  = weightBuf.get()

    return acc/weights
开发者ID:maweigert,项目名称:gputools,代码行数:56,代码来源:nlm3.py

示例3: _setup_gpu

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]
    def _setup_gpu(self):
        dev = get_device()
        self._queue = dev.queue
        self._ctx = dev.context
        prog = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        # the buffers/ images
        Nx, Ny = self.simul_xy
        Nx0, Ny0 = self.shape[:2]

        self._plan = fft_plan((Ny, Nx), **self.fftplan_kwargs)
        self._buf_plane = OCLArray.empty((Ny, Nx), np.complex64)
        self._buf_H = OCLArray.empty((Ny, Nx), np.complex64)
        self._img_xy = OCLImage.empty((Ny, Nx), dtype=np.float32, num_channels=2)

        # buffer for the weighted dn average
        self.intens_g = OCLArray.empty((1, Ny, Nx), dtype=Bpm3d._real_type)
        self.intens_dn_g = OCLArray.empty((1, Ny, Nx), dtype=Bpm3d._real_type)
        self.intens_sum_g = OCLArray.zeros((), dtype=Bpm3d._real_type)
        self.intens_dn_sum_g = OCLArray.zeros((), dtype=Bpm3d._real_type)

        # the kernels
        self._kernel_compute_propagator = prog.compute_propagator
        self._kernel_compute_propagator.set_scalar_arg_dtypes((None,)+(np.float32,)*5)
        self._kernel_compute_propagator_buf = prog.compute_propagator_buf
        self._kernel_compute_propagator_buf.set_scalar_arg_dtypes((None,)+(np.float32,)*5+(None,)*2)

        self._kernel_mult_complex = prog.mult

        self._kernel_im_to_buf_field = prog.img_to_buf_field
        self._kernel_im_to_buf_intensity = prog.img_to_buf_intensity
        self._kernel_im_to_im_intensity = prog.img_to_img_intensity
        self._kernel_buf_to_buf_field = prog.buf_to_buf_field
        self._kernel_buf_to_buf_intensity = prog.buf_to_buf_intensity

        self._kernel_mult_dn_img_float = prog.mult_dn_image
        self._kernel_mult_dn_buf_float = prog.mult_dn
        self._kernel_mult_dn_img_complex = prog.mult_dn_image_complex
        self._kernel_mult_dn_buf_complex = prog.mult_dn_complex

        self._kernel_mult_dn_img_float_local = prog.mult_dn_image_local
        self._kernel_mult_dn_buf_float_local = prog.mult_dn_local
        self._kernel_mult_dn_img_complex_local = prog.mult_dn_image_complex_local
        self._kernel_mult_dn_buf_complex_local = prog.mult_dn_complex_local

        self._kernel_reduction = OCLMultiReductionKernel(np.float32,
                                                         neutral="0", reduce_expr="a+b",
                                                         map_exprs=["a[i]", "b[i]"],
                                                         arguments="__global float *a, __global float *b")

        self._fill_propagator(self.n0)
开发者ID:maweigert,项目名称:biobeam,代码行数:53,代码来源:bpm3d.py

示例4: _setup_impl

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]
    def _setup_impl(self):
        """setting up the gpu buffers and kernels
        """

        self.bpm_program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

        Nx, Ny, Nz  = self.size

        self._plan = fft_plan((Ny,Nx))


        self._H_g = OCLArray.from_array(self._H.astype(np.complex64))

        if not self.dn is None and self.n_volumes==1:
           self.dn_g = OCLArray.from_array(self.dn)


        self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32))
        self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32))

        self.scatter_cross_sec_g = OCLArray.zeros(Nz,"float32")
        self.gfactor_g = OCLArray.zeros(Nz,"float32")
开发者ID:maweigert,项目名称:bpm,代码行数:24,代码来源:_bpm3d_ocl.py

示例5: _convolve_spatial2

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]
def _convolve_spatial2(im, hs,
                      mode = "constant",
                      grid_dim = None,
                      pad_factor = 2,
                      plan = None,
                      return_plan = False):
    """
    spatial varying convolution of an 2d image with a 2d grid of psfs

    shape(im_ = (Ny,Nx)
    shape(hs) = (Gy,Gx, Hy,Hx)

    the input image im is subdivided into (Gy,Gx) blocks
    hs[j,i] is the psf at the center of each block (i,j)

    as of now each image dimension has to be divisible by the grid dim, i.e.
    Nx % Gx == 0
    Ny % Gy == 0


    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition
    """

    if grid_dim:
        Gs = tuple(grid_dim)
    else:
        Gs = hs.shape[:2]


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ny, Nx = im.shape
    Gy, Gx = Gs


    # the size of each block within the grid
    Nblock_y, Nblock_x = Ny/Gy, Nx/Gx


    # the size of the overlapping patches with safety padding
    Npatch_x, Npatch_y = _next_power_of_2(pad_factor*Nblock_x), _next_power_of_2(pad_factor*Nblock_y)


    prog = OCLProgram(abspath("kernels/conv_spatial2.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan((Npatch_y,Npatch_x))

    x0s = Nblock_x*np.arange(Gx)
    y0s = Nblock_y*np.arange(Gy)


    patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros((Gy,Gx,Npatch_y,Npatch_x),np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False))
        for i,_x0 in enumerate(x0s):
            for j,_y0 in enumerate(y0s):
                prog.run_kernel("fill_psf_grid2",
                                (Nblock_x,Nblock_y),None,
                        tmp_g.data,
                        np.int32(Nx),
                        np.int32(i*Nblock_x),
                        np.int32(j*Nblock_y),
                        h_g.data,
                        np.int32(Npatch_x),
                        np.int32(Npatch_y),
                        np.int32(-Nblock_x/2+Npatch_x/2),
                        np.int32(-Nblock_y/2+Npatch_y/2),
                        np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y)
                            )
    else:
        hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3))
        h_g = OCLArray.from_array(hs.astype(np.complex64))


    #prepare image
    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    for i,_x0 in enumerate(x0s):
        for j,_y0 in enumerate(y0s):
            prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None,
                    im_g,
                    np.int32(_x0+Nblock_x/2-Npatch_x/2),
                    np.int32(_y0+Nblock_y/2-Npatch_y/2),
                    patches_g.data,
                    np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y))


    #return np.abs(patches_g.get())
    # convolution
    fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan)
    fft(h_g,inplace=True, batch = Gx*Gy, plan = plan)
    prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None,
#.........这里部分代码省略.........
开发者ID:maweigert,项目名称:gputools,代码行数:103,代码来源:convolve_spatial2.py

示例6: _convolve_spatial3

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]
def _convolve_spatial3(im, hs,
                      mode = "constant",
                      grid_dim = None,
                      plan = None,
                      return_plan = False,
                      pad_factor = 2):



    if im.ndim !=3:
        raise ValueError("wrong dimensions of input!")

    if not (hs.ndim==6 or (hs.ndim==3 and grid_dim)):
        raise ValueError("wrong dimensions of psf grid!")

    if grid_dim:
        if hs.shape != im.shape:
            raise ValueError("if grid_dim is set, then im.shape = hs.shape !")
        Gs = tuple(grid_dim)
    else:
        if not hs.ndim==6:
            raise ValueError("wrong dimensions of psf grid! (Gy,Gx,Ny,Nx)")
        Gs = hs.shape[:3]

    if not np.all([n%g==0 for n,g in zip(im.shape,Gs)]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s shape mismatch"%(str(hs.shape[:2])))



    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ns = im.shape


    # the size of each block within the grid
    Nblocks = [n/g for n,g  in zip(Ns,Gs)]


    # the size of the overlapping patches with safety padding
    Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks])

    prog = OCLProgram(abspath("kernels/conv_spatial3.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)


    Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)]

    patches_g = OCLArray.empty(Gs+Npatchs,np.complex64)

    #prepare psfs
    if grid_dim:
        h_g = OCLArray.zeros(Gs+Npatchs,np.complex64)
        tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False))
        for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
            prog.run_kernel("fill_psf_grid3",
                        Nblocks[::-1],None,
                        tmp_g.data,
                        np.int32(im.shape[2]),
                        np.int32(im.shape[1]),
                        np.int32(i*Nblocks[2]),
                        np.int32(j*Nblocks[1]),
                        np.int32(k*Nblocks[0]),
                        h_g.data,
                        np.int32(Npatchs[2]),
                        np.int32(Npatchs[1]),
                        np.int32(Npatchs[0]),
                        np.int32(-Nblocks[2]/2+Npatchs[2]/2),
                        np.int32(-Nblocks[1]/2+Npatchs[1]/2),
                        np.int32(-Nblocks[0]/2+Npatchs[0]/2),
                        np.int32(i*np.prod(Npatchs)+
                         j*Gs[2]*np.prod(Npatchs)+
                         k*Gs[2]*Gs[1]*np.prod(Npatchs)))

    else:
        hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5))
        h_g = OCLArray.from_array(hs.astype(np.complex64))


    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    # this loops over all i,j,k
    for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel("fill_patch3",Npatchs[::-1],None,
                im_g,
                    np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2),
                    np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2),
                    np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2),
                    patches_g.data,
                    np.int32(i*np.prod(Npatchs)+
                             j*Gs[2]*np.prod(Npatchs)+
                             k*Gs[2]*Gs[1]*np.prod(Npatchs)))


    # convolution
    fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan)
    fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan)
#.........这里部分代码省略.........
开发者ID:maweigert,项目名称:gputools,代码行数:103,代码来源:convolve_spatial3.py

示例7: _bpm_3d_image

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]
def _bpm_3d_image(size,
            units,
            lam = .5,
            u0 = None, dn = None,
            subsample = 1,
            n0 = 1.,
            return_scattering = False,
            return_g = False,
            return_full_last = False,
            use_fresnel_approx = False,
            ):
    """
    simulates the propagation of monochromativ wave of wavelength lam with initial conditions u0 along z in a media filled with dn

    size     -    the dimension of the image to be calulcated  in pixels (Nx,Ny,Nz)
    units    -    the unit lengths of each dimensions in microns
    lam      -    the wavelength
    u0       -    the initial field distribution, if u0 = None an incident  plane wave is assumed
    dn       -    the refractive index of the medium (can be complex)

    """
    clock = StopWatch()

    clock.tic("setup")

    Nx, Ny, Nz = size
    dx, dy, dz = units

    # subsampling
    Nx2, Ny2, Nz2 = (subsample*N for N in size)
    dx2, dy2, dz2 = (1.*d/subsample for d in units)

    #setting up the propagator
    k0 = 2.*np.pi/lam

    kxs = 2.*np.pi*np.fft.fftfreq(Nx2,dx2)
    kys = 2.*np.pi*np.fft.fftfreq(Ny2,dy2)

    KY, KX = np.meshgrid(kys,kxs, indexing= "ij")

    #H0 = np.sqrt(0.j+n0**2*k0**2-KX**2-KY**2)
    H0 = np.sqrt(n0**2*k0**2-KX**2-KY**2)

    if use_fresnel_approx:
        H0  = 0.j+n0**2*k0-.5*(KX**2+KY**2)


    outsideInds = np.isnan(H0)

    H = np.exp(-1.j*dz2*H0)

    H[outsideInds] = 0.
    H0[outsideInds] = 0.

    if u0 is None:
        u0 = np.ones((Ny2,Nx2),np.complex64)
    else:
        if subsample >1:
            u0 = zoom(np.real(u0),subsample) + 1.j*zoom(np.imag(u0),subsample)

    # setting up the gpu buffers and kernels

    program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl"))

    plan = fft_plan((Ny2,Nx2))
    plane_g = OCLArray.from_array(u0.astype(np.complex64))

    h_g = OCLArray.from_array(H.astype(np.complex64))

    if dn is not None:
        if isinstance(dn,OCLImage):
            dn_g = dn
        else:
            if dn.dtype.type in (np.complex64,np.complex128):

                dn_complex = np.zeros(dn.shape+(2,),np.float32)
                dn_complex[...,0] = np.real(dn)
                dn_complex[...,1] = np.imag(dn)
                dn_g = OCLImage.from_array(dn_complex)

            else:
                dn_g = OCLImage.from_array(dn.astype(np.float32))

        isComplexDn = dn.dtype.type in (np.complex64,np.complex128)

    else:
        #dummy dn
        dn_g = OCLArray.empty((1,)*3,np.float16)


    if return_scattering:
        cos_theta = np.real(H0)/n0/k0

        # = cos(theta)
        scatter_weights = cos_theta

        scatter_weights_g = OCLArray.from_array(scatter_weights.astype(np.float32))

        # = cos(theta)^2
        gfactor_weights = cos_theta**2
#.........这里部分代码省略.........
开发者ID:maweigert,项目名称:bpm,代码行数:103,代码来源:bpm_3d.py

示例8: _bpm_3d2

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]

#.........这里部分代码省略.........
                if store_dn_as_half:
                    dn_g = OCLArray.from_array(dn.astype(np.float16,copy= False))
                else:
                    dn_g = OCLArray.from_array(dn.astype(np.float32,copy= False))

    else:
        #dummy dn
        dn_g = OCLArray.empty((1,)*3,np.float32)


    if return_scattering:
        cos_theta = np.real(H0)/n0/k0

        # _H = np.sqrt(n0**2*k0**2-KX**2-KY**2)
        # _H[np.isnan(_H)] = 0.
        #
        # cos_theta = _H/n0/k0
        # # = cos(theta)
        scatter_weights = cos_theta

        #scatter_weights = np.sqrt(KX**2+KY**2)/k0/np.real(H0)
        #scatter_weights[outsideInds] = 0.

        scatter_weights_g = OCLArray.from_array(scatter_weights.astype(np.float32))

        # = cos(theta)^2
        gfactor_weights = cos_theta**2

        gfactor_weights_g = OCLArray.from_array(gfactor_weights.astype(np.float32))


        #return None,None,scatter_weights, gfactor_weights

        scatter_cross_sec_g = OCLArray.zeros(Nz,"float32")
        gfactor_g = OCLArray.zeros(Nz,"float32")

        plain_wave_dct = Nx*Ny*np.exp(-1.j*k0*n0*(scattering_plane_ind+np.arange(Nz))*dz).astype(np.complex64)


        reduce_kernel = OCLReductionKernel(
        np.float32, neutral="0",
            reduce_expr="a+b",
            map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)",
            arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain")

        # reduce_kernel = OCLReductionKernel(
        # np.float32, neutral="0",
        #     reduce_expr="a+b",
        #     map_expr = "weights[i]*(i!=0)*cfloat_abs(field[i])*cfloat_abs(field[i])",
        #     arguments = "__global cfloat_t *field, __global float * weights,cfloat_t plain")

    if return_full:
        if return_field:
            u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64)
            u_g[0] = plane_g
        else:
            u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32)
            program.run_kernel("copy_intens",(Nx*Ny,),None,
                           plane_g.data,u_g.data, np.int32(0))


    clock.toc("setup")

    clock.tic("run")

开发者ID:maweigert,项目名称:bpm,代码行数:68,代码来源:bpm_3d.py

示例9: convolve_spatial3

# 需要导入模块: from gputools import OCLArray [as 别名]
# 或者: from gputools.OCLArray import zeros [as 别名]

#.........这里部分代码省略.........
    mode can be:
    "constant" - assumed values to be zero
    "wrap" - periodic boundary condition


    """
    if im.ndim !=3 or hs.ndim !=6:
        raise ValueError("wrong dimensions of input!")

    if not np.all([n%g==0 for n,g in zip(im.shape,hs.shape[:3])]):
        raise NotImplementedError("shape of image has to be divisible by Gx Gy  = %s !"%(str(hs.shape[:3])))


    mode_str = {"constant":"CLK_ADDRESS_CLAMP",
                "wrap":"CLK_ADDRESS_REPEAT"}

    Ns = tuple(im.shape)
    Gs = tuple(hs.shape[:3])


    # the size of each block within the grid
    Nblocks = [n/g for n,g  in zip(Ns,Gs)]


    # the size of the overlapping patches with safety padding
    Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks])

    print hs.shape
    hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5))



    prog = OCLProgram(abspath("kernels/conv_spatial.cl"),
                      build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]])

    if plan is None:
        plan = fft_plan(Npatchs)

    patches_g = OCLArray.empty(Gs+Npatchs,np.complex64)

    h_g = OCLArray.from_array(hs.astype(np.complex64))

    im_g = OCLImage.from_array(im.astype(np.float32,copy=False))

    Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)]




    print Nblocks
    # this loops over all i,j,k
    for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]):
        prog.run_kernel("fill_patch3",Npatchs[::-1],None,
                im_g,
                    np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2),
                    np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2),
                    np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2),
                    patches_g.data,
                    np.int32(i*np.prod(Npatchs)+
                             j*Gs[2]*np.prod(Npatchs)+
                             k*Gs[2]*Gs[1]*np.prod(Npatchs)))



    print patches_g.shape, h_g.shape




    # convolution
    fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan)
    fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan)
    prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None,
                    patches_g.data, h_g.data)

    fft(patches_g,
        inplace=True,
        inverse = True,
        batch = np.prod(Gs),
        plan = plan)

    #return patches_g.get()
    #accumulate
    res_g = OCLArray.zeros(im.shape,np.float32)

    for k, j, i in product(*[range(g+1) for g in Gs]):
        prog.run_kernel("interpolate3",Nblocks[::-1],None,
                        patches_g.data,
                        res_g.data,
                        np.int32(i),np.int32(j),np.int32(k),
                        np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]),
                        np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0]))


    res = res_g.get()

    if return_plan:
        return res, plan
    else:
        return res
开发者ID:maweigert,项目名称:gputools,代码行数:104,代码来源:convolve_spatial_old.py


注:本文中的gputools.OCLArray.zeros方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。