Python gpuarray.empty_like函数代码示例

本文整理汇总了Python中pycuda.gpuarray.empty_like函数的典型用法代码示例。如果您正苦于以下问题：Python empty_like函数的具体用法？Python empty_like怎么用？Python empty_like使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了empty_like函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: calc_x_G

def calc_x_G(Kp1, C, Cm1, rp1, lm2, Am1, A, Ap1, lm1_s, lm1_si, r_s, r_si, Vsh, handle=None):
    D = A[0].shape[1]
    Dm1 = A[0].shape[0]
    q = len(A)
    
    x = garr.zeros((Dm1, q * D - Dm1), dtype=A[0].dtype)
    x_part = garr.empty_like(x)
    x_subpart = garr.empty_like(A[0])
    
    if not (C is None and Kp1 is None):
        assert (not C is None) and (not Kp1 is None)
        x_part.fill(0)
        for s in range(q):
            x_subpart = eps_r(rp1, C[s], Ap1, x_subpart, handle) #~1st line
            
            x_subpart += cla.dot(A[s], Kp1, handle=handle) #~3rd line
    
            x_part += cla.dot(cla.dot(x_subpart, r_si, handle=handle), Vsh[s], handle=handle)

        x += cla.dot(lm1_s, x_part, handle=handle)

    if not lm2 is None:
        x_part.fill(0)
        for s in range(q):     #~2nd line
            x_subpart = eps_l(lm2, Am1, Cm1[s], x_subpart, handle)
            x_part += cla.dot(x_subpart, cla.dot(r_s, Vsh[s], handle=handle), handle=handle)
        x += cla.dot(lm1_si, x_part, handle=handle)
        
    return x

开发者ID:amilsted，项目名称:evoMPS，代码行数:29，代码来源:cuda_alternatives.py

示例2: gpubarlinedata

def gpubarlinedata(xdata, ydata, bins, minval=None, maxval=None):
    if maxval == None:
        maxval = gpumax(xdata)
    if minval == None:
        minval = gpumin(xdata)
    binsize = (maxval - minval) / float(bins)
    inbin = gpuarray.empty_like(xdata)
    select = gpuarray.empty_like(xdata)
    xmeans = []
    ymeans = []
    errors = []
    for i in xrange(bins):
        lo = minval + binsize * i
        hi = minval + binsize * (i + 1)
        gpubarlinekerna(xdata, lo, hi, inbin)
        N = gpusum(inbin)
        if N > 1:
            gpubarlinekernb(inbin, ydata, select)
            my = gpusum(select) / float(N)
            gpubarlinekernb(inbin, xdata, select)
            mx = gpusum(select) / float(N)
            gpubarlinekernc(inbin, ydata, my, select)
            s = sqrt(gpusum(select) / (N * (N - 1)))
            xmeans.append(mx)
            ymeans.append(my)
            errors.append(s)
    return (xmeans, ymeans, errors)

开发者ID:michaelerule，项目名称:neurotools，代码行数:27，代码来源:statistics.py

示例3: integrate

def integrate(stepsize=0.01, stores=5, steps=10000, number_of_particles=2 ** 10):
    gpu_r, gpu_v, gpu_mass = create_particles(number_of_particles)
    number_of_particles = np.int32(number_of_particles)
    gpu_rs, gpu_vs = [gpu_r], [gpu_v]

    for i in xrange(stores - 1):
        gpu_rs.append(gpuarray.empty_like(gpu_r))
        gpu_vs.append(gpuarray.empty_like(gpu_v))

    advance = SourceModule(advance_kernel).get_function("advance")
    advance.prepare([np.intp, np.intp, np.intp, np.intp, np.intp, np.int32])

    block_size = (32, 0, 0)
    grid_size = (int(number_of_particles / 32), 0, 0)

    advance.prepared_call(block_size, grid_size, gpu_r[0], gpu_v[0], gpu_mass, gpu_r[1], gpu_v[1], number_of_particles)

    old, new = 1, 2
    for i in xrange(steps):
        r = rs_gpu[old].get_async()
        v = vs_gpu[old].get_async()
        advance.prepared_call_async(
            block_size, grid_size, gpu_rs[old], gpu_vs[old], gpu_mass, gpu_rs[new], gpu_vs[new], number_of_particles
        )

        np.write("step{i:4}_r".format(i * stepsize) + ".dat", r)
        np.write("step{i:4}_v".format(i * stepsize) + ".dat", r)

        old, new = new, (new + 1) % stores

开发者ID:tricecold，项目名称:pynbody，代码行数:29，代码来源:nbody.py

示例4: sici

def sici(x_gpu):
    """
    Sine/Cosine integral.

    Computes the sine and cosine integral of every element in the
    input matrix.

    Parameters
    ----------
    x_gpu : GPUArray
        Input matrix of shape `(m, n)`.
        
    Returns
    -------
    (si_gpu, ci_gpu) : tuple of GPUArrays
        Tuple of GPUarrays containing the sine integrals and cosine
        integrals of the entries of `x_gpu`.
        
    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import scipy.special
    >>> import special
    >>> x = np.array([[1, 2], [3, 4]], np.float32)
    >>> x_gpu = gpuarray.to_gpu(x)
    >>> (si_gpu, ci_gpu) = sici(x_gpu)
    >>> (si, ci) = scipy.special.sici(x)
    >>> np.allclose(si, si_gpu.get())
    True
    >>> np.allclose(ci, ci_gpu.get())
    True
    """

    if x_gpu.dtype == np.float32:
        args = 'float *x, float *si, float *ci'
        op = 'sicif(x[i], &si[i], &ci[i])'
    elif x_gpu.dtype == np.float64:
        args = 'double *x, double *si, double *ci'
        op = 'sici(x[i], &si[i], &ci[i])'
    else:
        raise ValueError('unsupported type')
    
    try:
        func = sici.cache[x_gpu.dtype]
    except KeyError:
        func = elementwise.ElementwiseKernel(args, op,
                                 options=["-I", install_headers],
                                 preamble='#include "cuSpecialFuncs.h"')
        sici.cache[x_gpu.dtype] = func

    si_gpu = gpuarray.empty_like(x_gpu)
    ci_gpu = gpuarray.empty_like(x_gpu)
    func(x_gpu, si_gpu, ci_gpu)
        
    return (si_gpu, ci_gpu)

开发者ID:Lurkman，项目名称:scikits.cuda，代码行数:57，代码来源:special.py

示例5: make_GPU_gradient

def make_GPU_gradient(mesh, context):
    '''Prepare to compute gradient on the GPU w.r.t. the given mesh.
    Return gradient function.
    '''
    mx = int(getattr(mesh, 'nx', 1))
    my = int(getattr(mesh, 'ny', 1))
    mz = int(getattr(mesh, 'nz', 1))

    dxInv = np.array(1./getattr(mesh, 'dx', 1), dtype=np.float64)
    dyInv = np.array(1./getattr(mesh, 'dy', 1), dtype=np.float64)
    dzInv = np.array(1./getattr(mesh, 'dz', 1), dtype=np.float64)

    sizeof_double = 8
    with open(where + 'gradient2.cu') as fdlib:
        source = fdlib.read()
    module = SourceModule(source)

    mx_ptr = module.get_global("mx")[0]
    my_ptr = module.get_global("my")[0]
    mz_ptr = module.get_global("mz")[0]
    cuda.memcpy_htod(mx_ptr, np.array(mx, dtype=np.int32))
    cuda.memcpy_htod(my_ptr, np.array(my, dtype=np.int32))
    cuda.memcpy_htod(mz_ptr, np.array(mz, dtype=np.int32))

    dxInv_ptr = module.get_global("dxInv")[0]
    dyInv_ptr = module.get_global("dyInv")[0]
    dzInv_ptr = module.get_global("dzInv")[0]
    cuda.memcpy_htod(dxInv_ptr, dxInv)
    cuda.memcpy_htod(dyInv_ptr, dyInv)
    cuda.memcpy_htod(dzInv_ptr, dzInv)

    deriv_x = module.get_function("gradient_x")
    deriv_y = module.get_function("gradient_y")
    deriv_z = module.get_function("gradient_z")

    block, grid = mesh.get_domain_decomposition(DeviceData().max_threads)

    d_deriv_x = gpuarray.empty(shape=(1, mesh.n_nodes), dtype=np.float64)
    d_deriv_y = gpuarray.empty_like(d_deriv_x)
    d_deriv_z = gpuarray.empty_like(d_deriv_x)

    def _gradient(scalar_values):
        '''Calculate three-dimensional gradient for GPUArray
        scalar_values.
        '''
        deriv_x(scalar_values, d_deriv_x, block=block, grid=grid)
        deriv_y(scalar_values, d_deriv_y, block=block, grid=grid)
        deriv_z(scalar_values, d_deriv_z, block=block, grid=grid)
        context.synchronize()

        return (d_deriv_x, d_deriv_y, d_deriv_z)[:mesh.dimension]
    return _gradient

开发者ID:aoeftiger，项目名称:PyPIC，代码行数:52，代码来源:gradient.py

示例6: init

 def __init__(self,n_units,n_incoming,N,init_sd=1.0,precision=np.float32,magic_numbers=False):
     
     self.n_units = n_units
     self.n_incoming = n_incoming
     self.N = N
     w = np.random.normal(0,init_sd,(self.n_incoming,self.n_units))
     b = np.random.normal(0,init_sd,(1,n_units))
     
     self.weights = gpuarray.to_gpu(w.copy().astype(precision))
     self.gW = gpuarray.empty_like(self.weights)
     
     # Prior and ID must be set after creation
     self.prior = -1
     self.ID = -1
             
     self.biases = gpuarray.to_gpu(b.copy().astype(precision))
     self.gB = gpuarray.empty_like(self.biases)
         
     #Set up momentum variables for HMC sampler
     self.pW = gpuarray.to_gpu(np.random.normal(0,1,self.gW.shape))
     self.pB = gpuarray.to_gpu(np.random.normal(0,1,self.gB.shape))
     
     self.epsW = gpuarray.zeros(self.weights.shape,precision) + 1.0
     self.epsB = gpuarray.zeros(self.biases.shape,precision) + 1.0        
     
     self.precision = precision
     self.outputs = gpuarray.zeros((self.N,self.n_units),precision)   
     
     self.magic_numbers = magic_numbers
     #Define tan_h function on GPU   
     if magic_numbers:
         self.tanh = ElementwiseKernel(
             "float *x",
             "x[i] = 1.7159 * tanh(2/3*x[i]);",
             "tan_h",preamble="#include <math.h>")
     else:
         self.tanh = ElementwiseKernel(
         "float *x",
         "x[i] = tanh(min(max(-10.0,x[i]),10.0));",
         "tan_h",preamble="#include <math.h>")
     #Compile kernels 
     kernels = SourceModule(open(path+'/kernels.cu', "r").read())        
     self.add_bias_kernel = kernels.get_function("add_bias")
     
     self.rng = curandom.XORWOWRandomNumberGenerator()
     
     ##Initialize posterior weights
     self.posterior_weights = list()
     self.posterior_biases = list()

开发者ID:beamandrew，项目名称:BNN，代码行数:49，代码来源:Layer.py

示例7: e1z

def e1z(z_gpu):
    """
    Exponential integral with `n = 1` of complex arguments.

    Parameters
    ----------
    x_gpu : GPUArray
        Input matrix of shape `(m, n)`.
        
    Returns
    -------
    e_gpu : GPUArray
        GPUarrays containing the exponential integrals of
        the entries of `z_gpu`.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import scipy.special
    >>> import special
    >>> z = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), np.complex64)
    >>> z_gpu = gpuarray.to_gpu(z)
    >>> e_gpu = e1z(z_gpu, pycuda.autoinit.device)
    >>> e_sp = scipy.special.exp1(z)
    >>> np.allclose(e_sp, e_gpu.get())
    True

    """

    if z_gpu.dtype == np.complex64:
        use_double = 0
    elif z_gpu.dtype == np.complex128:
        use_double = 1
    else:
        raise ValueError('unsupported type')

    
    # Get block/grid sizes; the number of threads per block is limited
    # to 256 because the e1z kernel defined above uses too many
    # registers to be invoked more threads per block:
    dev = get_current_device()
    max_threads_per_block = 256
    block_dim, grid_dim = select_block_grid_sizes(dev, z_gpu.shape, max_threads_per_block)

    # Set this to False when debugging to make sure the compiled kernel is
    # not cached:
    cache_dir=None
    e1z_mod = \
             SourceModule(e1z_mod_template.substitute(use_double=use_double),
                          cache_dir=cache_dir)
    e1z_func = e1z_mod.get_function("e1z")

    e_gpu = gpuarray.empty_like(z_gpu)
    e1z_func(z_gpu, e_gpu,
              np.uint32(z_gpu.size),
              block=block_dim,
              grid=grid_dim)
    return e_gpu

开发者ID:sequoiar，项目名称:scikits.cuda，代码行数:60，代码来源:special.py

示例8: test_cublas_bug

def test_cublas_bug():
    '''
    The SGEMM call would cause all calls after it to fail for some unknown
    reason. Likely this is caused swaprows causing memory corruption.

    NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
          fixed in CUDA 6.5
    '''
    from pycuda.driver import Stream
    from skcuda.cublas import cublasSgemm
    from skcuda.misc import _global_cublas_handle as handle

    n = 131

    s = slice(128, n)
    X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
    a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
    c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
    b = gpuarray.empty_like(X)

    m, n = a.shape[0], b[s].shape[1]
    k = a.shape[1]
    lda = m
    ldb = k
    ldc = m
    #cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    #print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc

    #gpuarray.dot(d, Xoutd[s])
    #op.sgemm(a, b[s], c)

    stream = Stream()
    stream.synchronize()

开发者ID:stachon，项目名称:binet，代码行数:34，代码来源:test_op.py

示例9: feed_forward

    def feed_forward(self, input_data, prediction=False):
        """Propagate forward through the layer

        **Parameters:**

        input_data : ``GPUArray``
            Inpute data to perform dropout on.

        prediction : bool, optional
            Whether to use prediction model. If true, then the data is
            scaled by ``1 - dropout_probability`` uses dropout.

        **Returns:**
        
        dropout_data : ``GPUArray``
            The data after performing dropout.
        """

        assert input_data.shape[1] == self.n_in

        if not prediction:
            dropout_input = gpuarray.empty_like(input_data)
            dropout_mask = sample_dropout_mask(input_data,
                                               self.dropout_probability, target=dropout_input
                                           )
            return dropout_input, dropout_mask
        else:
            return (input_data * (1 - self.dropout_probability),)

开发者ID:DavidDJChen，项目名称:hebel，代码行数:28，代码来源:input_dropout.py

示例10: initializeGpuMemory

 def initializeGpuMemory(self):
     K = self.modelParams["proc_id_model","K"]
     
     # Sufficient statistics for the parameters of G kernels
     self.gpuPtrs["impulse_model","nnz_Z"] = gpuarray.empty((K,K), dtype=np.int32)
     self.gpuPtrs["impulse_model","g_suff_stats"] = gpuarray.empty((K,K), dtype=np.float32) 
     self.gpuPtrs["impulse_model","GS"] = gpuarray.empty_like(self.base.dSS["dS"])

开发者ID:richardkwo，项目名称:pyhawkes，代码行数:7，代码来源:impulse_models.py

示例11: computeIrDensity

 def computeIrDensity(self, dS_gpu):
     """
     Compute the impulse response density at the time intervals in dS_gpu 
     """
     K = self.modelParams["proc_id_model","K"]
     N = self.base.data.N
     gS_gpu = gpuarray.empty_like(dS_gpu)
     
     # Update GS using the impulse response parameters
     grid_w = int(np.ceil(N/1024.0))
     self.gpuKernels["computeLogisticNormalGSIndiv"](np.int32(K),
                                                     np.int32(self.base.data.N),
                                                     self.gpuPtrs["proc_id_model","C"].gpudata,
                                                     self.base.dSS["rowIndices"].gpudata,
                                                     self.base.dSS["colPtrs"].gpudata,
                                                     self.gpuPtrs["impulse_model","g_mu"].gpudata,
                                                     self.gpuPtrs["impulse_model","g_tau"].gpudata,
                                                     np.float32(self.params["dt_max"]),
                                                     dS_gpu.gpudata,
                                                     gS_gpu.gpudata,
                                                     block=(1024, 1, 1), 
                                                     grid=(grid_w,1)
                                                     )
     
     return gS_gpu

开发者ID:richardkwo，项目名称:pyhawkes，代码行数:25，代码来源:impulse_models.py

示例12: feed_forward

    def feed_forward(self, input_data, prediction=False):
        """Propagate forward through the layer

        **Parameters:**

        input_data : ``GPUArray``
            Inpute data to perform dropout on.

        prediction : bool, optional
            Whether to use prediction model. If true, then the data is
            scaled by ``1 - dropout_probability`` uses dropout.

        **Returns:**
        
        dropout_data : ``GPUArray``
            The data after performing dropout.
        """

        if input_data.shape[1] != self.n_in:
            raise ValueError('Number of outputs from previous layer (%d) '
                             'does not match number of inputs to this layer (%d)' %
                             (input_data.shape[1], self.n_in))

        if not prediction:
            dropout_input = gpuarray.empty_like(input_data)
            dropout_mask = sample_dropout_mask(input_data,
                                               self.dropout_probability, target=dropout_input
                                           )
            return dropout_input, dropout_mask
        else:
            return (input_data * (1 - self.dropout_probability),)

开发者ID:hani1986ye，项目名称:hebel，代码行数:31，代码来源:input_dropout.py

示例13: e1z

def e1z(z_gpu, dev):
    """
    Exponential integral with `n = 1` of complex arguments.

    Parameters
    ----------
    x_gpu : GPUArray
        Input matrix of shape `(m, n)`.
    dev : pycuda.driver.Device
        Device object to be used.
        
    Returns
    -------
    e_gpu : GPUArray
        GPUarrays containing the exponential integrals of
        the entries of `z_gpu`.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import scipy.special
    >>> import special
    >>> z = np.asarray(np.random.rand(4, 4)+1j*np.random.rand(4, 4), np.complex64)
    >>> z_gpu = gpuarray.to_gpu(z)
    >>> e_gpu = e1z(z_gpu, pycuda.autoinit.device)
    >>> e_sp = scipy.special.exp1(z)
    >>> np.allclose(e_sp, e_gpu.get())
    True

    """

    if z_gpu.dtype == np.complex64:
        use_double = 0
    elif z_gpu.dtype == np.complex128:
        use_double = 1
    else:
        raise ValueError("unsupported type")

    # Get block/grid sizes:
    max_threads_per_block, max_block_dim, max_grid_dim = get_dev_attrs(dev)
    block_dim, grid_dim = select_block_grid_sizes(dev, z_gpu.shape)
    max_blocks_per_grid = max(max_grid_dim)

    # Set this to False when debugging to make sure the compiled kernel is
    # not cached:
    cache_dir = None
    e1z_mod = SourceModule(
        e1z_mod_template.substitute(
            use_double=use_double, max_threads_per_block=max_threads_per_block, max_blocks_per_grid=max_blocks_per_grid
        ),
        cache_dir=cache_dir,
        options=["-I", install_headers],
    )
    e1z_func = e1z_mod.get_function("e1z")

    e_gpu = gpuarray.empty_like(z_gpu)
    e1z_func(z_gpu.gpudata, e_gpu.gpudata, np.uint32(z_gpu.size), block=block_dim, grid=grid_dim)
    return e_gpu

开发者ID:stefanv，项目名称:scikits.cuda，代码行数:60，代码来源:special.py

示例14: worker

def worker():
    comm = MPI.Comm.Get_parent()
    size = comm.Get_size()
    rank = comm.Get_rank()
    name = MPI.Get_processor_name()

    import pycuda.driver as drv
    drv.init()

    # Find maximum number of available GPUs:
    max_gpus = drv.Device.count()

    # Use modular arithmetic to avoid assigning a nonexistent GPU:
    n = rank % max_gpus
    dev = drv.Device(n)
    ctx = dev.make_context()
    atexit.register(ctx.pop)

    # Execute a kernel:
    import pycuda.gpuarray as gpuarray
    from pycuda.elementwise import ElementwiseKernel
    
    kernel = ElementwiseKernel('double *y, double *x, double a',
                               'y[i] = a*x[i]')
    x_gpu = gpuarray.to_gpu(np.random.rand(2))
    y_gpu = gpuarray.empty_like(x_gpu)
    kernel(y_gpu, x_gpu, np.double(2.0))

    print 'I am process %d of %d on CPU %s using GPU %s of %s [x_gpu=%s, y_gpu=%s]' % \
        (rank, size, name, n, max_gpus, str(x_gpu.get()), str(y_gpu.get()))
    comm.Disconnect()

开发者ID:lebedov，项目名称:cudamps，代码行数:31，代码来源:demo.py

示例15: mult_matrix

def mult_matrix(a, b, target=None):
    assert a.shape == b.shape
    if target is None:
        target = gpuarray.empty_like(a)

    all_kernels["mult_matrix"](a, b, target)
    return target

开发者ID:Khodeir，项目名称:hebel，代码行数:7，代码来源:elementwise.py

注：本文中的pycuda.gpuarray.empty_like函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。