当前位置: 首页>>代码示例>>Python>>正文


Python compiler.SourceModule方法代码示例

本文整理汇总了Python中pycuda.compiler.SourceModule方法的典型用法代码示例。如果您正苦于以下问题:Python compiler.SourceModule方法的具体用法?Python compiler.SourceModule怎么用?Python compiler.SourceModule使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pycuda.compiler的用法示例。


在下文中一共展示了compiler.SourceModule方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: maximum_filter_2d

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def maximum_filter_2d(arr2D, footprint): ## Make sure arr2D is our datatype float32 and footprint of int32
    arr2DMaxed = numpy.empty_like(arr2D)
    head, tail = os.path.split(os.path.abspath(__file__)) # Used so that we can always get the kernel which should be in the same directory as this file

    maxFunction = open(head + "/2DSlidingMaxFootprintKernel.c", "rt")
    maxFunction = SourceModule(maxFunction.read())
    slidingMaxKernel = maxFunction.get_function("slidingMaxiumum2D")

    blockSize = [16, 16] # To-do: Add a variable to this, can affect performance based on GPU
    gridSize = getGridSize(blockSize, arr2D.shape) # Get the size of our grid based on the size of a grid (blocksize)


    slidingMaxKernel(cuda.In(arr2D),                   # Input
                    cuda.Out(arr2DMaxed),              # Output
                    numpy.int32(footprint.shape[1]),   # Kernel Size
                    numpy.int32(arr2D.shape[1]),       # Row Stride
                    numpy.int32(1),                    # Column Stride
                    numpy.int32(int(arr2D.shape[1])),  # Array Column Count
                    numpy.int32(int(arr2D.shape[0])),  # Array Row Count
                    cuda.In(footprint),
                    block=(blockSize[0],blockSize[1],1),
                    grid=(gridSize[0],gridSize[1],1)
    )

    return arr2DMaxed 
开发者ID:CwbhX,项目名称:Jamais-Vu,代码行数:27,代码来源:gpu.py

示例2: test_pycuda_only

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def test_pycuda_only():
    """Run pycuda only example to test that pycuda works."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    # Test with pycuda in/out of numpy.ndarray
    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)
    dest = numpy.zeros_like(a)
    multiply_them(
        drv.Out(dest), drv.In(a), drv.In(b),
        block=(400, 1, 1), grid=(1, 1))
    assert (dest == a * b).all() 
开发者ID:muhanzhang,项目名称:D-VAE,代码行数:23,代码来源:test_pycuda_theano_simple.py

示例3: test_pycuda_theano

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def test_pycuda_theano():
    """Simple example with pycuda function and Theano CudaNdarray object."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)

    # Test with Theano object
    ga = cuda_ndarray.CudaNdarray(a)
    gb = cuda_ndarray.CudaNdarray(b)
    dest = cuda_ndarray.CudaNdarray.zeros(a.shape)
    multiply_them(dest, ga, gb,
                  block=(400, 1, 1), grid=(1, 1))
    assert (numpy.asarray(dest) == a * b).all() 
开发者ID:muhanzhang,项目名称:D-VAE,代码行数:25,代码来源:test_pycuda_theano_simple.py

示例4: make_thunk

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def make_thunk(self, node, storage_map, _, _2):
        mod = SourceModule("""
    __global__ void my_fct(float * i0, float * o0, int size) {
    int i = blockIdx.x*blockDim.x + threadIdx.x;
    if(i<size){
        o0[i] = i0[i]*2;
    }
  }""")
        pycuda_fct = mod.get_function("my_fct")
        inputs = [ storage_map[v] for v in node.inputs]
        outputs = [ storage_map[v] for v in node.outputs]
        def thunk():
            z = outputs[0]
            if z[0] is None or z[0].shape!=inputs[0][0].shape:
                z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape)
            grid = (int(numpy.ceil(inputs[0][0].size / 512.)),1)
            pycuda_fct(inputs[0][0], z[0], numpy.intc(inputs[0][0].size),
                       block=(512,1,1), grid=grid)

        return thunk 
开发者ID:muhanzhang,项目名称:D-VAE,代码行数:22,代码来源:pycuda_double_op.py

示例5: __init__

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def __init__(self, num_trans, min_support, use_CUDA, block, thread, use_optimal=True):
		self.num_trans = num_trans
		self.min_support = min_support * num_trans
		self.support_list = {}
		self.use_CUDA = use_CUDA
		self.use_optimal = use_optimal
		if self.use_CUDA and not self.use_optimal:
			assert block != None and thread != None
			mod = SourceModule("""__global__ void multiply_element(int *dest, int *a, int *b) {
								const int idx = threadIdx.x + blockDim.x * blockIdx.x;
								dest[idx] = a[idx] * b[idx];
							   }""")
			self.multiply = mod.get_function("multiply_element")
			self.block = (block, thread, 1)
			dx, mx = divmod(self.num_trans, self.block[0])
			dy, my = divmod(1, self.block[1])
			self.grid = (int(dx + (mx>0)), int(dy + (my>0)))
			print("Using Block =", self.block)
			print("Using Grid =", self.grid)
		elif self.use_CUDA:
			print("Accelerating Eclat computation with GPU!")
		else:
			print("Not using GPU for acceleration.") 
开发者ID:andi611,项目名称:Apriori-and-Eclat-Frequent-Itemset-Mining,代码行数:25,代码来源:eclat.py

示例6: pack_rows

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def pack_rows():

    code = pack() + r"""
__global__ void pack_rows(float *a, unsigned int *b, int size) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;

    if (i < size) {
        b[i] = pack(&a[i * 32]);
    }
}
"""

    module = SourceModule(code)
    kernel = module.get_function("pack_rows")
    sig = "2P I"
    kernel.prepare(sig)
    return kernel 
开发者ID:NervanaSystems,项目名称:neon,代码行数:19,代码来源:binary.py

示例7: get_dckernel

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def get_dckernel(slen):
    # Right now, hardcoding the number of threads per block
    nt = 1024
    nb = int(numpy.ceil(slen / 1024.0))

    if nb > 1024:
        raise ValueError("More than 1024 blocks not supported yet")

    try:
        return dckernel_cache[nb]
    except KeyError:
        mod = SourceModule(kernel_sources.render(ntpb=nt, nblocks=nb))
        freq_tex = mod.get_texref("freq_tex")
        amp_tex = mod.get_texref("amp_tex")
        phase_tex = mod.get_texref("phase_tex")
        fn1 = mod.get_function("find_block_indices")
        fn1.prepare("PPifff", texrefs=[freq_tex])
        fn2 = mod.get_function("linear_interp")
        fn2.prepare("PfiffiPP", texrefs=[freq_tex, amp_tex, phase_tex])
        dckernel_cache[nb] = (fn1, fn2, freq_tex, amp_tex, phase_tex, nt, nb)
        return dckernel_cache[nb] 
开发者ID:gwastro,项目名称:pycbc,代码行数:23,代码来源:decompress_cuda.py

示例8: div_eigenenergy_cuda

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def div_eigenenergy_cuda(ksn2e, ksn2f, nfermi, vstart, comega, nm2v_re, nm2v_im,
        block_size, grid_size):

    block = (int(block_size[0]), int(block_size[1]), int(1))
    grid = (int(grid_size[0]), int(grid_size[1]))

    mod = SourceModule(kernel_code_div_eigenenergy_cuda)
    calc_XXVV = mod.get_function("calc_XXVV_gpu")
    calc_XXVV(nm2v_re, nm2v_im, np.int32(nm2v_re.shape[0]),
        np.int32(nm2v_re.shape[1]), ksn2e, ksn2f, np.int32(nfermi),
        np.int32(vstart), np.int32(ksn2e.shape[0]), np.float64(comega.real),
        np.float64(comega.imag), block = block, grid = grid) 
开发者ID:pyscf,项目名称:pyscf,代码行数:14,代码来源:m_div_eigenenergy_cuda.py

示例9: propagate

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def propagate(self, iters=2, rand_search_radius=500):
        """
        Optimize the NNF using PatchMatch Algorithm
        :param iters: number of iterations
        :param rand_search_radius: max radius to use in random search
        :return:
        """
        mod = SourceModule(open(os.path.join(package_directory,"patchmatch.cu")).read(),no_extern_c=True)
        patchmatch = mod.get_function("patch_match")
        
        rows = self.A.shape[0]
        cols = self.A.shape[1]
        channels = np.int32(self.A.shape[2])
        nnf_t = np.zeros(shape=(rows,cols),dtype=np.uint32)
        threads = 20
        
        def get_blocks_for_dim(dim,blocks):
            #if dim % blocks ==0:
            #    return dim//blocks
            return dim// blocks +1 
        patchmatch(
            drv.In(self.A),
            drv.In(self.AA),
            drv.In(self.B),
            drv.In(self.BB),
            drv.InOut(self.nnf),
            drv.InOut(nnf_t),
            drv.InOut(self.nnd),
            np.int32(rows),
            np.int32(cols),
            channels,
            np.int32(self.patch_size),
            np.int32(iters),
            np.int32(8),
            np.int32(rand_search_radius),
        block=(threads,threads,1),
        grid=(get_blocks_for_dim(rows,threads),
              get_blocks_for_dim(cols,threads))) 
开发者ID:yelantingfeng,项目名称:pyLucid,代码行数:40,代码来源:PatchMatchCuda.py

示例10: make_node

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def make_node(self, *inputs):
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('different ranks among inputs')

        if any([any(i.type.broadcastable) for i in inputs]):
            raise Exception("pycuda don't support broadcasted dimensions")
        assert len(inputs) == 2  # TODO remove

        otype = CudaNdarrayType(broadcastable=[False] * _inputs[0].type.ndim)
        assert self.nout == 1

        fct_name = "pycuda_elemwise_%s" % str(self.scalar_op)
        out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)])
        in_name = ["i" + str(id) for id in range(len(inputs))]
        out_name = ["o" + str(id) for id in range(self.nout)]
        c_code = self.scalar_op.c_code(out_node, "some_name",
                                       tuple([n + "[i]" for n in in_name]),
                                       tuple(n + "[i]" for n in out_name), {})
        c_code_param = ", ".join(
            [_replace_npy_types(var.type.dtype_specs()[1]) + " *" + name
             for var, name in chain(izip(inputs, in_name),
                                    izip(out_node.outputs, out_name))] +
            ["int size"])
        mod = SourceModule("""
  __global__ void %s(%s)
  {
    int i = (blockIdx.x+blockIdx.y*gridDim.x)*(blockDim.x*blockDim.y);
    i += threadIdx.x + threadIdx.y*blockDim.x;
    if(i<size){
        %s
    }
  }
  """ % (fct_name, c_code_param, c_code))
        self.pycuda_fct = mod.get_function(fct_name)
        return out_node 
开发者ID:muhanzhang,项目名称:D-VAE,代码行数:41,代码来源:pycuda_example.py

示例11: _prepare_compound_kernel

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def _prepare_compound_kernel(transformer, ops):
    """
    Generate and return a kernel given a set of ops.

    ops (list): List of tuples describing ops to execute in kernel. Each tuple
        should be of the format (op_name, input0, input1, output, axis)
    """
    # Take care tensor dimensionality
    ops = _wrap_tensor_descriptions(transformer, ops)

    # Generate kernel source code and block/grid mapping
    (axes_mapping, dims) = _get_axes_mapping(ops)
    code, kernel_name, arg_desc, params = _get_compound_kernel(ops, axes_mapping, dims)

    # Compile kernel
    if _are_flex_params(params):
        code = _includes_template + _flex_includes_template + code
    else:
        code = _includes_template + code

    module = SourceModule(code, options=[])
    kernel = module.get_function(kernel_name)
    kernel.name = kernel_name
    kernel.prepare(arg_desc)

    # Calculate block and grid dims
    blockdim = [1, 1, 1]
    griddim = [1, 1, 1]
    for axis in axes_mapping:
        if axis[0] == 'x':
            blockdim[0] = axis[1]
            griddim[0] = axis[2]
        elif axis[0] == 'y':
            blockdim[1] = axis[1]
            griddim[1] = axis[2]
        elif axis[0] == 'z':
            blockdim[2] = axis[1]
            griddim[2] = axis[2]

    params = [tuple(griddim), tuple(blockdim), None] + params
    return (kernel, params, 128) 
开发者ID:NervanaSystems,项目名称:ngraph-python,代码行数:43,代码来源:float_ew2.py

示例12: _get_transpose_kernel

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def _get_transpose_kernel(dtype):

    code = _transpose_kernel % {
        "type": _get_register_type(dtype, memory=True)
    }
    module = SourceModule(code)
    kernel = module.get_function("transpose")
    kernel.prepare("PPII")
    return kernel 
开发者ID:NervanaSystems,项目名称:ngraph-python,代码行数:11,代码来源:flex_conv.py

示例13: _get_shuffle_kernel

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def _get_shuffle_kernel(dtype):
    code = _shuffle_kernel % {
        "type": _get_register_type(dtype, memory=True)
    }
    module = SourceModule(code)
    kernel = module.get_function("dimShuffle")
    kernel.prepare("PPIIIIIIIIIIIIII")
    return kernel 
开发者ID:NervanaSystems,项目名称:ngraph-python,代码行数:10,代码来源:flex_conv.py

示例14: compile_cuda_kernel

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def compile_cuda_kernel(cuda_kernel_code):
    """
    compiles a cuda kernel and return compiled module
    """
    try:
        cuda_code = cuda_kernel_code if 1 else replace_local_floats_with_double(cuda_kernel_code)
        logging.debug("Compiling cuda code:\n" + cuda_code)
        mod = SourceModule(cuda_code, options=DEFAULT_NVCC_FLAGS + ['--use_fast_math'])
    except cuda.CompileError as e:
        logging.error(cuda_code)
        logging.error("CUDA compilation error:")
        logging.error(e.stderr)
        raise e
    return mod 
开发者ID:comp-imaging,项目名称:ProxImaL,代码行数:16,代码来源:cuda_codegen.py

示例15: __init__

# 需要导入模块: from pycuda import compiler [as 别名]
# 或者: from pycuda.compiler import SourceModule [as 别名]
def __init__(self, math_function='y = sin(x)', precision='d', lo=0, hi=np.pi, samples_per_thread=10**5, num_blocks=100):
        
        self.math_function = math_function
        
        if precision in [None, 's', 'S', 'single', np.float32]:
            self.precision = 'float'
            self.numpy_precision = np.float32
            self.p_curand = ''
        elif precision in ['d','D', 'double', np.float64]:
            self.precision = 'double'
            self.numpy_precision = np.float64
            self.p_curand = '_double'
        else:
            raise Exception('precision is invalid datatype!')
            
        if (hi - lo <= 0):
            raise Exception('hi - lo <= 0!')
        else:
            self.hi = hi
            self.lo = lo
              
        MonteCarloDict = {'p' : self.precision, 'p_curand' : self.p_curand, 'math_function' : self.math_function}
        
        self.MonteCarloCode = MonteCarloKernelTemplate % MonteCarloDict
        
        self.ker = SourceModule(no_extern_c=True , options=['-w'], source=self.MonteCarloCode)
        
        self.f = self.ker.get_function('monte_carlo')
        
        self.num_blocks = num_blocks
        
        self.samples_per_thread = samples_per_thread 
开发者ID:PacktPublishing,项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA,代码行数:34,代码来源:monte_carlo_integrator.py


注:本文中的pycuda.compiler.SourceModule方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。