当前位置: 首页>>代码示例>>Python>>正文


Python gpuarray.empty方法代码示例

本文整理汇总了Python中pycuda.gpuarray.empty方法的典型用法代码示例。如果您正苦于以下问题:Python gpuarray.empty方法的具体用法?Python gpuarray.empty怎么用?Python gpuarray.empty使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pycuda.gpuarray的用法示例。


在下文中一共展示了gpuarray.empty方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: interpNearest

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def interpNearest(ary, ny, nx):
  """Used to interpolate the mask for each stage."""
  if ary.shape == (ny, nx):
    return ary
  y, x = ary.shape
  rx = x / nx
  ry = y / ny
  out = np.empty((ny, nx), dtype=np.float32)
  for j in range(ny):
    for i in range(nx):
      out[j, i] = ary[int(ry * j + .5), int(rx * i + .5)]
  return out


# =======================================================================#
# =                                                                     =#
# =                        Class CorrelStage:                           =#
# =                                                                     =#
# =======================================================================# 
开发者ID:LaboratoireMecaniqueLille,项目名称:crappy,代码行数:21,代码来源:gpucorrel.py

示例2: gpu_mandelbrot

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def gpu_mandelbrot(width, height, real_low, real_high, imag_low, imag_high, max_iters, upper_bound):

    # we set up our complex lattice as such
    real_vals = np.matrix(np.linspace(real_low, real_high, width), dtype=np.complex64)
    imag_vals = np.matrix(np.linspace( imag_high, imag_low, height), dtype=np.complex64) * 1j
    mandelbrot_lattice = np.array(real_vals + imag_vals.transpose(), dtype=np.complex64)    
    
    # copy complex lattice to the GPU
    mandelbrot_lattice_gpu = gpuarray.to_gpu_async(mandelbrot_lattice)

    # synchronize in current context
    pycuda.autoinit.context.synchronize()

    # allocate an empty array on the GPU
    mandelbrot_graph_gpu = gpuarray.empty(shape=mandelbrot_lattice.shape, dtype=np.float32)

    mandel_ker( mandelbrot_lattice_gpu, mandelbrot_graph_gpu, np.int32(max_iters), np.float32(upper_bound))

    pycuda.autoinit.context.synchronize()
              
    mandelbrot_graph = mandelbrot_graph_gpu.get_async()
    
    pycuda.autoinit.context.synchronize()

    return mandelbrot_graph 
开发者ID:PacktPublishing,项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA,代码行数:27,代码来源:gpu_mandelbrot_context_sync.py

示例3: gpu_mandelbrot

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def gpu_mandelbrot(width, height, real_low, real_high, imag_low, imag_high, max_iters, upper_bound):

    # we set up our complex lattice as such
    real_vals = np.matrix(np.linspace(real_low, real_high, width), dtype=np.complex64)
    imag_vals = np.matrix(np.linspace( imag_high, imag_low, height), dtype=np.complex64) * 1j
    mandelbrot_lattice = np.array(real_vals + imag_vals.transpose(), dtype=np.complex64)    
    
    # copy complex lattice to the GPU
    mandelbrot_lattice_gpu = gpuarray.to_gpu(mandelbrot_lattice)

    # allocate an empty array on the GPU
    mandelbrot_graph_gpu = gpuarray.empty(shape=mandelbrot_lattice.shape, dtype=np.float32)

    mandel_ker( mandelbrot_lattice_gpu, mandelbrot_graph_gpu, np.int32(max_iters), np.float32(upper_bound))
              
    mandelbrot_graph = mandelbrot_graph_gpu.get()
    
    return mandelbrot_graph 
开发者ID:PacktPublishing,项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA,代码行数:20,代码来源:gpu_mandelbrot0.py

示例4: scikit_gpu_fft_pipeline

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def scikit_gpu_fft_pipeline(filename):
    data = []
    start = timer()
    with open(filename, 'r') as file_obj:
        for _ in range(((32768*1024*SIZE_MULTIPLIER//GULP_SIZE)//COMPLEX_MULTIPLIER)//GULP_FRAME_FFT):
            data = np.fromfile(file_obj, dtype=np.complex64, count=GULP_SIZE*GULP_FRAME_FFT).reshape((GULP_FRAME_FFT, GULP_SIZE))
            g_data = gpuarray.to_gpu(data)
            plan = Plan(data.shape[1], np.complex64, np.complex64, batch=GULP_FRAME_FFT)
            plan_inverse = Plan(data.shape[1], np.complex64, np.complex64, batch=GULP_FRAME_FFT)
            tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
            tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
            fft(g_data, tmp1, plan)
            ifft(tmp1, tmp2, plan_inverse)
            for _ in range(NUMBER_FFT-1):
                # Can't do FFT in place for fairness (emulating full pipeline)
                tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
                fft(tmp2, tmp1, plan)
                tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
                ifft(tmp1, tmp2, plan_inverse)
    end = timer()
    return end-start 
开发者ID:ledatelescope,项目名称:bifrost,代码行数:23,代码来源:skcuda_fft_pipeline.py

示例5: call_prepare

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def call_prepare(self, sz, allocator):
    MAX_BLOCK_COUNT = 1024
    SMALL_SEQ_COUNT = 4        

    if sz <= self.block_size*SMALL_SEQ_COUNT*MAX_BLOCK_COUNT:
        total_block_size = SMALL_SEQ_COUNT*self.block_size
        block_count = (sz + total_block_size - 1) // total_block_size
        seq_count = SMALL_SEQ_COUNT
    else:
        block_count = MAX_BLOCK_COUNT
        macroblock_size = block_count*self.block_size
        seq_count = (sz + macroblock_size - 1) // macroblock_size

    if block_count == 1:
        result = empty((), self.dtype_out, allocator)
    else:
        result = empty((block_count,), self.dtype_out, allocator)

    grid_size = (block_count, 1)
    block_size =  (self.block_size, 1, 1)

    return result, block_count, seq_count, grid_size, block_size 
开发者ID:gwastro,项目名称:pycbc,代码行数:24,代码来源:array_cuda.py

示例6: resampleD

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def resampleD(self, newY, newX):
    """Resamples tex_d and returns it in a gpuarray"""
    if (self.rX, self.rY) != (np.int32(newX), np.int32(newY)):
      self.rGrid = (int(ceil(newX / 32)), int(ceil(newY / 32)))
      self.rBlock = (int(ceil(newX / self.rGrid[0])),
                     int(ceil(newY / self.rGrid[1])), 1)
      self.rX, self.rY = np.int32(newX), np.int32(newY)
      self.devROut = gpuarray.empty((newY, newX), np.float32)
    self.debug(3, "Resampling img_d texture to", (newY, newX),
               " grid:", self.rGrid, "block:", self.rBlock)
    self._resampleKrnl.prepared_call(self.rGrid, self.rBlock,
                                     self.devROut.gpudata,
                                     self.rX, self.rY)
    return self.devROut 
开发者ID:LaboratoireMecaniqueLille,项目名称:crappy,代码行数:16,代码来源:gpucorrel.py

示例7: getFields

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def getFields(self, y=None, x=None):
    """Returns the fields, reampled to size (y,x)"""
    if x is None or y is None:
      y = self.h[0]
      x = self.w[0]
    outX = gpuarray.empty((self.Nfields, y, x), np.float32)
    outY = gpuarray.empty((self.Nfields, y, x), np.float32)
    grid = (int(ceil(x / 32)), int(ceil(y / 32)))
    block = (int(ceil(x / grid[0])), int(ceil(y / grid[1])), 1)
    for i in range(self.Nfields):
      self.resampleF[i].prepared_call(grid, block,
                                      outX[i, :, :].gpudata,
                                      outY[i, :, :].gpudata,
                                      np.int32(x), np.int32(y))
    return outX, outY 
开发者ID:LaboratoireMecaniqueLille,项目名称:crappy,代码行数:17,代码来源:gpucorrel.py

示例8: eval_

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def eval_(self, x, y=None, batch_size=None, stream=None, delta=None, w_t = None, b_t = None):
    
        if stream is None:
            stream = self.stream
        
        if type(x) != pycuda.gpuarray.GPUArray:
            x = gpuarray.to_gpu_async(np.array(x,dtype=np.float32) , stream=self.stream)
            
        if batch_size is None:
            if len(x.shape) == 2:
                batch_size = np.int32(x.shape[0])
            else:
                batch_size = np.int32(1)
                
        if delta is None:
            delta = self.delta
            
        delta = np.float32(delta)
            
        if w_t is None:
            w_t = np.int32(-1)
            
        if b_t is None:
            b_t = np.int32(-1)
        
        
        if y is None:
            if batch_size == 1:
                y = gpuarray.empty((self.num_outputs,), dtype=np.float32)
            else:
                y = gpuarray.empty((batch_size, self.num_outputs), dtype=np.float32)


        eval_ker(self.num_outputs, self.num_inputs, self.relu, self.sigmoid, \
                 self.weights, self.b, x, y, np.int32(batch_size), w_t, b_t, \
                 delta , block=self.block, grid=self.grid , stream=stream)
        
        return y


# threads: at least "num" 
开发者ID:PacktPublishing,项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA,代码行数:43,代码来源:deep_neural_network.py

示例9: definite_integral

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def definite_integral(self, lo=None, hi=None, samples_per_thread=None, num_blocks=None):
        
        if lo is None or hi is None:
            lo = self.lo
            hi = self.hi
            
        if samples_per_thread is None:
            samples_per_thread = self.samples_per_thread
            
        if num_blocks is None:
            num_blocks = self.num_blocks
            grid = (num_blocks,1,1)
        else:
            grid = (num_blocks,1,1)
            
        block = (32,1,1)
        
        num_threads = 32*num_blocks
        
        self.ys = gpuarray.empty((num_threads,) , dtype=self.numpy_precision)
        
        self.f(np.int32(samples_per_thread), self.numpy_precision(lo), self.numpy_precision(hi), self.ys, block=block, grid=grid)
        
        self.nintegral = np.sum(self.ys.get() )
        
        return np.sum(self.nintegral) 
开发者ID:PacktPublishing,项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA,代码行数:28,代码来源:monte_carlo_integrator.py

示例10: device_buffer

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def device_buffer(self):
        """Return the device buffer.

        Returns
        -------
        pycuda.gpuarray.GPUArray
            The pycuda array taking the data.

        """
        if self._device_buf is None:
            self._device_buf = gpuarray.empty(self._shape, self._dtype)
        return self._device_buf 
开发者ID:seetaresearch,项目名称:dragon,代码行数:14,代码来源:engine.py

示例11: __init__

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def __init__(self, transformer, op):
        super(LUTBpropKernel, self).__init__(transformer)
        self.op = op

        # Hard coding for now, non-deterministic is faster but difficult to reproduce
        # or debug. Deterministic kernels are fast enough and LUT layer tends to be
        # a small percentage of execution time.
        self.deterministic = True

        (self.E, self.I) = (_ for _ in op.call_info())
        self.O = op.tensor_description()
        pad_idx = op.pad_idx
        lut_axis = op.lut_axis
        # Only supported when reads are contiguous
        assert (lut_axis == 0)

        embedding_dim = self.O.shape[1]
        vocab_size = self.O.shape[0]
        nin = self.E.shape[0]

        if pad_idx is None:
            pad_idx = int(-1)

        self.kernels = []

        if self.deterministic:
            self.index_buffer = empty((nin,), dtype=np.int32)
            self.offset_buffer = empty((nin,), dtype=np.int32)
            self.word_counts = empty((max(512, vocab_size) + 512,), dtype=np.int32)

            for kernel_id in range(5):
                threads = 512
                if kernel_id in [1, 3]:
                    blocks = vocab_size // (threads * 2)
                    if vocab_size % (threads * 2):
                        blocks = blocks + 1
                elif kernel_id == 2:
                    blocks = 1
                else:
                    blocks = nin // threads
                    if nin % threads:
                        blocks = blocks + 1

                params = [(blocks, 1, 1), (threads, 1, 1), None,
                          self.I, self.index_buffer.gpudata, self.offset_buffer.gpudata,
                          self.word_counts.gpudata, max(512, vocab_size), nin]
                kernel = lookuptable._get_sorting_kernel(kernel_id, threads, self.I.dtype)
                self.kernels.append((kernel, params))

            threads = 32
            blocks = nin

            params = [(blocks, 1, 1), (threads, 1, 1), None,
                      self.I, self.index_buffer.gpudata, self.O, self.E,
                      nin, embedding_dim, vocab_size, pad_idx]

            kernel = lookuptable._get_lut_bprop_kernel(self.E.dtype, self.I.dtype, True)
            self.kernels.append((kernel, params)) 
开发者ID:NervanaSystems,项目名称:ngraph-python,代码行数:60,代码来源:lut.py

示例12: __init__

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def __init__(self, bases, pv=None, *, force=False):
        """Create a new density matrix for several qudits.

        Parameters
        ----------
        bases : list of quantumsim.bases.PauliBasis
            Dimensions of qubits in the system.

        pv : array or None.
            Must be of size (2**no_qubits, 2**no_qubits). Only upper triangle
            is relevant.  If data is `None`, create a new density matrix with
            all qubits in ground state.
        """
        super().__init__(bases, pv, force=force)
        if pv is not None:
            if self.dim_pauli != pv.shape:
                raise ValueError(
                    '`bases` Pauli dimensionality should be the same as the '
                    'shape of `data` array.\n'
                    ' - bases shapes: {}\n - data shape: {}'
                    .format(self.dim_pauli, pv.shape))
        else:
            pv = np.zeros(self.dim_pauli, np.float64)
            ground_state_index = [pb.computational_basis_indices[0]
                                  for pb in self.bases]
            pv[tuple(ground_state_index)] = 1

        if isinstance(pv, np.ndarray):
            if pv.dtype not in (np.float16, np.float32, np.float64):
                raise ValueError(
                    '`pv` must have float64 data type, got {}'
                    .format(pv.dtype)
                )

            # Looks like there are some issues with ordering, so the line
            # below per se does not work.
            # self._data = ga.to_gpu(pv.astype(np.float64))

            self._work_data = ga.to_gpu(
                pv.reshape(pv.size, order='C').astype(np.float64))
            self._data = ga.empty(pv.shape, dtype=np.float64, order='C')
            self._data.set(self._work_data.reshape(pv.shape))
            self._work_data.gpudata.free()
        elif isinstance(pv, ga.GPUArray):
            if pv.dtype != np.float64:
                raise ValueError(
                    '`pv` must have float64 data type, got {}'
                    .format(pv.dtype)
                )
            self._data = pv
        else:
            raise ValueError(
                "`pv` must be Numpy array, PyCUDA GPU array or "
                "None, got type `{}`".format(type(pv)))

        self._data.gpudata.size = self._data.nbytes
        self._work_data = ga.empty_like(self._data)
        self._work_data.gpudata.size = self._work_data.nbytes 
开发者ID:quantumsim,项目名称:quantumsim,代码行数:60,代码来源:cuda.py

示例13: add_layer

# 需要导入模块: from pycuda import gpuarray [as 别名]
# 或者: from pycuda.gpuarray import empty [as 别名]
def add_layer(self, layer):
    
        if layer['type'] == 'dense':
            if len(self.network) == 0:
                num_inputs = layer['num_inputs']
            else:
                num_inputs = self.network_summary[-1][2]
            
            num_outputs = layer['num_outputs']
            sigmoid = layer['sigmoid']
            relu = layer['relu']
            
            weights = layer['weights']
            
            b = layer['bias']
            
            self.network.append(DenseLayer(num_inputs=num_inputs, num_outputs=num_outputs, sigmoid=sigmoid, relu=relu, weights=weights, b=b))
            self.network_summary.append( ('dense', num_inputs, num_outputs))
            
            if self.max_batch_size > 1:
                if len(self.network_mem) == 0:
                    self.network_mem.append(gpuarray.empty( (self.max_batch_size, self.network_summary[-1][1] ), dtype=np.float32 ) )
                self.network_mem.append(gpuarray.empty((self.max_batch_size, self.network_summary[-1][2] ), dtype=np.float32  ) ) 
            else:
                if len(self.network_mem) == 0:
                    self.network_mem.append( gpuarray.empty( (self.network_summary[-1][1], ), dtype=np.float32 ) )
                self.network_mem.append( gpuarray.empty((self.network_summary[-1][2], ), dtype=np.float32  ) ) 
    
        elif layer['type'] == 'softmax':
            
            if len(self.network) == 0:
                raise Exception("Error!  Softmax layer can't be first!")
            
            if self.network_summary[-1][0] != 'dense':
                raise Exception("Error!  Need a dense layer before a softmax layer!")
            
            
            num = self.network_summary[-1][2]
            
            self.network.append(SoftmaxLayer(num=num))
            
            self.network_summary.append(('softmax', num, num))
            
            if self.max_batch_size > 1:
                self.network_mem.append(gpuarray.empty((self.max_batch_size, self.network_summary[-1][2] ), dtype=np.float32  ) ) 
            else:
                self.network_mem.append( gpuarray.empty((self.network_summary[-1][2], ), dtype=np.float32  ) ) 
开发者ID:PacktPublishing,项目名称:Hands-On-GPU-Programming-with-Python-and-CUDA,代码行数:49,代码来源:deep_neural_network.py


注:本文中的pycuda.gpuarray.empty方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。