当前位置: 首页>>代码示例>>Python>>正文


Python gpuarray.to_gpu函数代码示例

本文整理汇总了Python中pycuda.gpuarray.to_gpu函数的典型用法代码示例。如果您正苦于以下问题:Python to_gpu函数的具体用法?Python to_gpu怎么用?Python to_gpu使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了to_gpu函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main(dtype):
    from pycuda.elementwise import get_linear_combination_kernel
    lc_kernel, lc_texrefs = get_linear_combination_kernel((
        (True, dtype, dtype),
        (True, dtype, dtype)
        ), dtype)

    for size_exp in range(10, 26):
        size = 1 << size_exp

        from pycuda.curandom import rand
        a = gpuarray.to_gpu(numpy.array(5, dtype=dtype))
        x = rand(size, dtype=dtype)
        b = gpuarray.to_gpu(numpy.array(7, dtype=dtype))
        y = rand(size, dtype=dtype)

        z = gpuarray.empty_like(x)

        start = drv.Event()
        stop = drv.Event()
        start.record()

        for i in range(20):
            a.bind_to_texref_ext(lc_texrefs[0], allow_double_hack=True)
            b.bind_to_texref_ext(lc_texrefs[1], allow_double_hack=True)
            lc_kernel.prepared_call(x._grid, x._block,
                x.gpudata, y.gpudata, z.gpudata, x.mem_size)

        stop.record()
        stop.synchronize()

        print size, size_exp, stop.time_since(start)
开发者ID:sluo1989,项目名称:computing,代码行数:32,代码来源:GpuScalarMult.py

示例2: calculate_circuit_graph_vertex_data_device

def calculate_circuit_graph_vertex_data_device(d_D, d_C, length):
    logger = logging.getLogger('eulercuda.pyeulertour.calculate_circuit_graph_vertex_data_device')
    logger.info("started.")
    mod = SourceModule("""
    __global__ void calculateCircuitGraphVertexData( unsigned int * D,unsigned int * C,unsigned int ecount){

        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if( tid <ecount)
        {
            unsigned int c=D[tid];
            atomicExch(C+c,1);
        }
    }
    """)
    calculate_circuit_graph_vertex_data = mod.get_function('calculateCircuitGraphVertexData')
    block_dim, grid_dim = getOptimalLaunchConfiguration(length, 512)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_D = gpuarray.to_gpu(d_D)
    np_d_C = gpuarray.to_gpu(d_C)
    calculate_circuit_graph_vertex_data(
        np_d_D,
        np_d_C,
        np.uintc(length),
        block=block_dim, grid=grid_dim
    )
    np_d_D.get(d_D)
    np_d_C.get(d_C)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))
    logger.info("Finished. Leaving.")
    return d_D, d_C
开发者ID:zenlc2000,项目名称:pycuda-euler,代码行数:32,代码来源:pyeulertour.py

示例3: test_cublasDgemmBatched

    def test_cublasDgemmBatched(self):
        l, m, k, n = 11, 7, 5, 3
        A = np.random.rand(l, m, k).astype(np.float64)
        B = np.random.rand(l, k, n).astype(np.float64)

        C_res = np.einsum('nij,njk->nik',A,B)

        a_gpu = gpuarray.to_gpu(A)
        b_gpu = gpuarray.to_gpu(B)
        c_gpu = gpuarray.empty((l, m, n), np.float64)

        alpha = np.float64(1.0)
        beta = np.float64(0.0)

        a_arr = bptrs(a_gpu)
        b_arr = bptrs(b_gpu)
        c_arr = bptrs(c_gpu)

        cublas.cublasDgemmBatched(self.cublas_handle, 'n','n',
                                  n, m, k, alpha,
                                  b_arr.gpudata, n,
                                  a_arr.gpudata, k,
                                  beta, c_arr.gpudata, n, l)

        assert np.allclose(C_res, c_gpu.get())
开发者ID:Brainiarc7,项目名称:scikit-cuda,代码行数:25,代码来源:test_cublas.py

示例4: gpu_sweep_col_mult

def gpu_sweep_col_mult(X, y):
    """ X * y = X across the columns """
    if type(X)==GPUArray:
        gX = X
    else:
        gX = to_gpu(np.asarray(X, dtype=np.float32))

    if type(y)==GPUArray:
        gy = y
    else:
        gy = to_gpu(np.asarray(y, dtype=np.float32))

    dims = np.asarray(X.shape, dtype=np.int32)
    if devinfo.max_block_threads >= 1024:
        blocksize = 32
    else:
        blocksize = 16

    gridsize = int(dims[0] / blocksize) + 1
    shared = 4*blocksize

    if gX.flags.c_contiguous:
        func = CUDA_Kernels.get_function("sweep_columns_mult")
    else:
        func = CUDA_Kernels.get_function("sweep_columns_mult_cm")

    func(gX, gy, dims[0], dims[1], block=(blocksize, blocksize,1),
         grid = (gridsize,1), shared = shared)

    if type(y)!=GPUArray:
        X = gX.get()
开发者ID:brodyh,项目名称:dpmix,代码行数:31,代码来源:cuda_functions.py

示例5: cache_z

 def cache_z(self, z):
     x = np.require(z.real, dtype = np.double, requirements = ['A','W','O','C'])
     y = np.require(z.imag, dtype = np.double, requirements = ['A','W','O','C'])
     xd = gpuarray.to_gpu(x)
     yd = gpuarray.to_gpu(y)
     cuda.memcpy_dtod(self.xd, xd.ptr, xd.nbytes)
     cuda.memcpy_dtod(self.yd, yd.ptr, yd.nbytes)
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:7,代码来源:riemanntheta_omegas.py

示例6: _init_weights

  def _init_weights(self, weight_shape, bias_shape):
    if self.weight is None:
      if self.name == 'noise':
        assert(weight_shape[0] == weight_shape[1])
        self.weight = gpuarray.to_gpu(np.eye(weight_shape[0], dtype = np.float32))
      else:
        self.weight = gpuarray.to_gpu(randn(weight_shape, np.float32) * self.initW)

    if self.bias is None:
      if self.initB > 0.0:
        self.bias = gpuarray.to_gpu((np.ones(bias_shape, dtype=np.float32) * self.initB))
      else:
        self.bias = gpuarray.zeros(bias_shape, dtype=np.float32)

    Assert.eq(self.weight.shape, weight_shape) 
    Assert.eq(self.bias.shape, bias_shape) 
    
    self.weightGrad = gpuarray.zeros_like(self.weight)
    self.biasGrad = gpuarray.zeros_like(self.bias)
    
    if self.momW > 0.0:
      if self.weightIncr is None:
        self.weightIncr = gpuarray.zeros_like(self.weight)
      if self.biasIncr is None:
        self.biasIncr = gpuarray.zeros_like(self.bias)
      
      Assert.eq(self.weightIncr.shape, weight_shape) 
      Assert.eq(self.biasIncr.shape, bias_shape)
开发者ID:tesatory,项目名称:fastnet-noisy,代码行数:28,代码来源:layer.py

示例7: gpu_sweep_row_div

def gpu_sweep_row_div(X, y):
    """ X / y = X down the rows """
    if type(X)==GPUArray:
        gX = X
    else:
        gX = to_gpu(np.asarray(X, dtype=np.float32))

    if type(y)==GPUArray:
        gy = y
    else:
        gy = to_gpu(np.asarray(y, dtype=np.float32))

    dims = np.asarray(X.shape, dtype=np.int32)
    if devinfo.max_block_threads >= 1024:
        blocksize = 32
    else:
        blocksize = 16

    gridsize = int(dims[0] / blocksize) + 1
    shared = int(4*dims[1])

    if gX.flags.c_contiguous:
        func = CUDA_Kernels.get_function("sweep_rows_div")
    else:
        func = CUDA_Kernels.get_functions("sweep_rows_div_cm")

    func(gX, gy, dims[0], dims[1], block=(blocksize, blocksize,1),
         grid = (gridsize,1), shared = shared)

    if type(y)!=GPUArray:
        X = gX.get()
开发者ID:brodyh,项目名称:dpmix,代码行数:31,代码来源:cuda_functions.py

示例8: cuda_dot3

def cuda_dot3(A, b):
    print("cuda_dot3", A.shape, b.shape)
    # send b to GPU
    b_gpu = gpuarray.to_gpu(b)
    # transpose b on GPU
    bt_gpu = linalg.transpose(b_gpu)
    #remove b for now
    b_gpu.gpudata.free()
    del(b_gpu)
    # send A to GPU    
    A_gpu = gpuarray.to_gpu(A)
    
    temp_gpu = linalg.dot(bt_gpu, A_gpu)
    
    bt_gpu.gpudata.free()
    del(bt_gpu)
    A_gpu.gpudata.free()
    del(A_gpu)
    
    # send b to GPU
    b_gpu = gpuarray.to_gpu(b)
    
    c_gpu = linalg.dot(temp_gpu, b_gpu)
    
    temp_gpu.gpudata.free()
    del(temp_gpu)
    b_gpu.gpudata.free()
    del(b_gpu)
        
    #theoretically possible to move into RAM, force cleanup on GPU and then return from RAM
    #but most likely not necessary
    return c_gpu.get()
开发者ID:sneshyba,项目名称:ice3,代码行数:32,代码来源:facetbrightnessstuff3.py

示例9: test_set_by_inds_from_inds

 def test_set_by_inds_from_inds(self):
     dest_gpu = gpuarray.to_gpu(np.zeros(5, dtype=np.float32))
     ind_dest = gpuarray.to_gpu(np.array([0, 2, 4]))
     src_gpu =  gpuarray.to_gpu(np.arange(5, 10, dtype=np.float32))
     ind_src =  gpuarray.to_gpu(np.array([2, 3, 4]))
     gpu.set_by_inds_from_inds(dest_gpu, ind_dest, src_gpu, ind_src)
     assert np.allclose(dest_gpu.get(), np.array([7, 0, 8, 0, 9], dtype=np.float32))
开发者ID:NeuralSci,项目名称:neurokernel,代码行数:7,代码来源:test_gpu.py

示例10: set_by_inds

    def set_by_inds(self, inds, data):
        """
        Set mapped data by integer indices.

        Parameters
        ----------
        inds : sequence of int
            Integer indices of data elements to update.
        data : numpy.ndarray
            Data to assign.
        """

        assert len(np.shape(inds)) == 1
        assert issubclass(inds.dtype.type, numbers.Integral)
        N = len(inds)
        assert N == len(data)

        if not isinstance(inds, gpuarray.GPUArray):
            inds = gpuarray.to_gpu(inds)
        if not isinstance(data, gpuarray.GPUArray):
            data = gpuarray.to_gpu(data)

        # Allocate data array if it doesn't exist:
        if not self.data:
            self.data = gpuarray.empty(N, data.dtype)
        else:
            assert self.data.dtype == data.dtype
        try:
            func = self.set_by_inds.cache[inds.dtype]
        except KeyError:
            inds_ctype = tools.dtype_to_ctype(inds.dtype)
            v = "{data_ctype} *dest, {inds_ctype} *inds, {data_ctype} *src".format(data_ctype=self.data_ctype, inds_ctype=inds_ctype)        
            func = elementwise.ElementwiseKernel(v, "dest[inds[i]] = src[i]")
            self.set_by_inds.cache[inds.dtype] = func
        func(self.data, inds, data, range=slice(0, N, 1))
开发者ID:MariyaS,项目名称:neurokernel,代码行数:35,代码来源:pm_gpu.py

示例11: main

def main():
    import numpy as np
    import pycuda.autoinit
    from pycuda import gpuarray
    from skdata import toy
    from hebel import memory_pool
    from hebel.data_providers import BatchDataProvider
    from hebel.models import NeuralNetRegression
    from hebel.optimizers import SGD
    from hebel.parameter_updaters import SimpleSGDUpdate
    from hebel.monitors import SimpleProgressMonitor
    from hebel.schedulers import exponential_scheduler

    # Get data
    data_cpu, targets_cpu = toy.Boston().regression_task()
    data = gpuarray.to_gpu(data_cpu.astype(np.float32), allocator=memory_pool.allocate)
    targets = gpuarray.to_gpu(targets_cpu.astype(np.float32), allocator=memory_pool.allocate)
    data_provider = BatchDataProvider(data, targets)

    # Create model object
    model = NeuralNetRegression(n_in=data_cpu.shape[1], n_out=targets_cpu.shape[1],
                                layers=[100], activation_function='relu')
    
    # Create optimizer object
    optimizer = SGD(model, SimpleSGDUpdate, data_provider, data_provider,
                    learning_rate_schedule=exponential_scheduler(.1, .9999),
                    early_stopping=True)
    optimizer.run(3000)
开发者ID:DavidDJChen,项目名称:hebel,代码行数:28,代码来源:neural_net_regression_example.py

示例12: cuda_ageSols

def cuda_ageSols(sols):
    """ makes solutions to age """

    #get num sols
    num_sols = len(sols);
    
    
    
    #convert to form of numpy arrays
    sols_arr = numpy.array(sols, numpy.float32);
    ones_arr = numpy.zeros_like(sols,numpy.float32);
    ones_arr[:,constants.AGE_GENE] = 1;
    
    #copy each to gpu
    sols_gpu = gpuarray.to_gpu(sols_arr);
    mask_gpu = gpuarray.to_gpu(ones_arr);
    
    #debug
    if debug == True:
        print mask_gpu.view();
    
    #apply mask
    aged_sols_gpu = sols_gpu + mask_gpu;
    
    sols = aged_sols_gpu.get().tolist();
开发者ID:adamuas,项目名称:coevondm,代码行数:25,代码来源:cudaInterface.py

示例13: _initialize_gpu_ds

    def _initialize_gpu_ds(self):
        """
        Setup GPU arrays.
        """

        self.synapse_state = garray.zeros(int(self.total_synapses) + \
                                    len(self.input_neuron_list), np.float64)
        if self.my_num_gpot_neurons>0:
            self.V = garray.zeros(int(self.my_num_gpot_neurons), np.float64)
        else:
            self.V = None

        if self.my_num_spike_neurons>0:
            self.spike_state = garray.zeros(int(self.my_num_spike_neurons), np.int32)

        if len(self.public_gpot_list)>0:
            self.public_gpot_list_g = garray.to_gpu(self.public_gpot_list)
            self.projection_gpot = garray.zeros(len(self.public_gpot_list), np.double)
            self._extract_gpot = self._extract_projection_gpot_func()

        if len(self.public_spike_list)>0:
            self.public_spike_list_g = garray.to_gpu( \
                (self.public_spike_list-self.spike_shift).astype(np.int32))
            self.projection_spike = garray.zeros(len(self.public_spike_list), np.int32)
            self._extract_spike = self._extract_projection_spike_func()
开发者ID:LuisMoralesAlonso,项目名称:neurokernel,代码行数:25,代码来源:LPU.py

示例14: main_no_tex

def main_no_tex(dtype):
    lc_kernel = get_lin_comb_kernel_no_tex((
        (True, dtype, dtype),
        (True, dtype, dtype)
        ), dtype)

    for size_exp in range(10,26):
        size = 1 << size_exp

        from pycuda.curandom import rand
        a = gpuarray.to_gpu(numpy.array(5, dtype=dtype))
        x = rand(size, dtype=dtype)
        b = gpuarray.to_gpu(numpy.array(7, dtype=dtype))
        y = rand(size, dtype=dtype)

        z = gpuarray.empty_like(x)

        start = drv.Event()
        stop = drv.Event()
        start.record()

        for i in range(20):
            lc_kernel.prepared_call(x._grid, x._block,
                a.gpudata, x.gpudata,
                b.gpudata, y.gpudata,
                z.gpudata, x.mem_size)

        stop.record()
        stop.synchronize()

        print size, size_exp, stop.time_since(start)
开发者ID:sluo1989,项目名称:computing,代码行数:31,代码来源:GpuScalarMult.py

示例15: test_neural_net_regression

    def test_neural_net_regression(self):
        for _ in range(20):
            N = 10000    # Number of data points
            D = 100      # Dimensionality of exogenous data
            P = 50       # Dimensionality of endogenous data

            W_true = 10 * np.random.rand(D, P) - 5
            b_true = 100 * np.random.rand(P) - 50

            X = np.random.randn(N, D)
            Y = np.dot(X, W_true) + b_true[np.newaxis, :] + np.random.randn(N, P)        

            W_lstsq = np.linalg.lstsq(np.c_[np.ones((N, 1)), X], Y)[0]
            b_lstsq = W_lstsq[0]
            W_lstsq = W_lstsq[1:]

            data_provider = BatchDataProvider(gpuarray.to_gpu(X.astype(np.float32),
                                                              allocator=memory_pool.allocate),
                                              gpuarray.to_gpu(Y.astype(np.float32),
                                                              allocator=memory_pool.allocate))

            model = NeuralNetRegression([], n_in=D, n_out=P)
            optimizer = SGD(model, SimpleSGDUpdate, 
                            data_provider, data_provider,
                            learning_rate_schedule=constant_scheduler(10.),
                            early_stopping=True)
            optimizer.run(100)

            self.assertLess(np.abs(W_lstsq - model.top_layer.W.get()).max(),
                            1e-5)
开发者ID:amit2014,项目名称:hebel,代码行数:30,代码来源:hebel_test.py


注:本文中的pycuda.gpuarray.to_gpu函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。