当前位置: 首页>>代码示例>>Python>>正文


Python pyopencl.enqueue_copy函数代码示例

本文整理汇总了Python中pyopencl.enqueue_copy函数的典型用法代码示例。如果您正苦于以下问题:Python enqueue_copy函数的具体用法?Python enqueue_copy怎么用?Python enqueue_copy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了enqueue_copy函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fromGpu

    def fromGpu(self, gpu_buf, target_shape, target_dtype):
        import pyopencl as cl

        data = numpy.empty(target_shape, target_dtype)
        queue = self._createQueue()
        cl.enqueue_copy(queue, data, gpu_buf, is_blocking=True)
        return data
开发者ID:ewaybotYan,项目名称:pyfft,代码行数:7,代码来源:helpers.py

示例2: _set

    def _set(self, ary):
        # Allocate a new buffer with suitable padding and assign
        buf = np.zeros(self.datashape, dtype=self.dtype)
        buf[...,:self.ioshape[-1]] = ary

        # Copy
        cl.enqueue_copy(self.backend.qdflt, self.data, buf)
开发者ID:Aerojspark,项目名称:PyFR,代码行数:7,代码来源:types.py

示例3: run

    def run(self, kernel, shape, *args):
        kargs = []

        for arg in args:
            if isinstance(arg, np.ndarray):
                if id(arg) in self.buffers:
                    buf = self.buffers[id(arg)]
                    cl.enqueue_copy(self.runtime.queues[0], buf, arg)
                else:
                    flags = cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR
                    buf = cl.Buffer(self.runtime.context, flags, arg.nbytes, hostbuf=arg)
                    self.buffers[id(arg)] = buf

                kargs.append(buf)
            else:
                kargs.append(np.float32(arg))

        # TODO: use user-supplied information if necessary
        first_np_array = [a for a in args if isinstance(a, np.ndarray)][0]
        workspace = shape if shape else first_np_array.shape

        if self.output is None:
            self.output = np.empty(workspace).astype(np.float32)
            out_buffer = cl.Buffer(self.runtime.context, cl.mem_flags.WRITE_ONLY, self.output.nbytes)
            self.buffers[id(self.output)] = out_buffer
        else:
            out_buffer = self.buffers[id(self.output)]

        kargs.append(out_buffer)

        start = time.time()
        kernel(self.runtime.queues[0], workspace, None, *kargs)
        cl.enqueue_copy(self.runtime.queues[0], self.output, out_buffer)
        self.time = time.time() - start
        return self.output
开发者ID:ufo-kit,项目名称:pina,代码行数:35,代码来源:pycl.py

示例4: calc_range

def calc_range(start, num, perexec):
	"""Calculate the otp-md5 of the 64-bit numbers range(start, num),
	   with otp sequence of rounds."""

	assert(num % perexec == 0)

	# Boilerplate OpenCL stuff
	ctx = cl.create_some_context()
	queue = cl.CommandQueue(ctx)
	mf = cl.mem_flags

	# Read the program source and compile
	sourcecode = open("otpmd5.cl").read()
	prg = cl.Program(ctx, sourcecode).build()

	for i in xrange(num / perexec):
		offset = start + (perexec * i)

		host_input = numpy.arange(offset, offset+perexec, dtype=numpy.uint64)
		result = numpy.empty_like(host_input)
		dev_input = cl.Buffer(ctx, mf.READ_ONLY | mf.USE_HOST_PTR, hostbuf=host_input)
		dev_output = cl.Buffer(ctx, mf.READ_WRITE, size=result.size * result.itemsize)
		prg.get_otpmd5_64k_rounds(queue, host_input.shape, None, dev_input, dev_output).wait()
		cl.enqueue_copy(queue, result, dev_output).wait()
		send_output(host_input, result)
开发者ID:therealmik,项目名称:otpbreak,代码行数:25,代码来源:create_otp_blocks.py

示例5: generate

 def generate(self, chunk_array, ctx, queue, heightmap_kernel):
     assert isinstance(chunk_array, ChunkArray)
     hmap = self._generate_hmap()
     x_bounds = (0, 8)
     y_bounds = (0, 8)
     for z in range(1):
         chunk_array.allocate_layer(z, x_bounds, y_bounds)
         for x in range(8):
             for y in range(8):
                 chunk_array.allocate_chunk(x, y, z, level=0)
     print("allocated!")
     ihmap = numpy.empty((256,256), dtype=numpy.int32)
     for x in range(256):
         for y in range(256):
             height = hmap[x, y]
             ihmap[x, y] = int(max(min(height*7.4 + 8, 32), 0))
     """for x in range(256):
         print(x)
         for y in range(256):
             height = hmap[x, y]
             z_max = int(max(min(height + 8, 32), 0))
             for z in range(32):
                 voxel = chunk_array.get_voxel(x, y, z)
                 voxel['flags'] = 0 if z_max < z else 1"""
     chunk_array.upload_buffers()
     buffer = pyopencl.Buffer(ctx, pyopencl.mem_flags.READ_ONLY|pyopencl.mem_flags.COPY_HOST_PTR, hostbuf = ihmap)
     #pyopencl.enqueue_copy(queue, buffer, hmap)
     heightmap_kernel(queue, (255, 255, 32), None, chunk_array.array_buffer._d_buffer, buffer)
     pyopencl.enqueue_copy(queue, chunk_array.voxel_data.level_buffers[0]._h_buffer, chunk_array.voxel_data.level_buffers[0]._d_buffer)
     chunk_array.upload_buffers()
开发者ID:EyeOfPython,项目名称:Beyond-Infinity,代码行数:30,代码来源:worldgenerator.py

示例6: final

def final(config, ctx, queue, program, buffers, debug=False):
    matrixSize = config['matrixSize']
    bandwidth = config['bandwidth']
    partitionNumber = config['partitionNumber']
    partitionSize = config['partitionSize']
    offdiagonalSize = config['offdiagonalSize']
    rhsSize = config['rhsSize']

    xo  = np.ones((partitionNumber * (partitionSize - 2 * offdiagonalSize), rhsSize), dtype=np.float32)
    tmp = np.ones((partitionNumber * (partitionSize - 2 * offdiagonalSize), rhsSize), dtype=np.float32)

    mf = cl.mem_flags
    xo_buf  = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=xo)
    tmp_buf = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp)

    kernel = program.reconstruct
    kernel.set_scalar_arg_dtypes([None, None, None, None, np.int32, np.int32, np.int32])

    cl.enqueue_barrier(queue)

    kernel(
        queue,
        (partitionNumber,),
        None,
        buffers[1], # Avwg buffer from factor, see if it is also readable and still valide
        buffers[3], # x buffer from solve, see if it is still valide
        xo_buf,
        tmp_buf,
        np.int32(partitionSize),
        np.int32(offdiagonalSize),
        np.int32(rhsSize)
    )

    xtb = np.ones((partitionNumber * 2 * offdiagonalSize, rhsSize), dtype=np.float32)
    cl.enqueue_copy(queue, xtb, buffers[3])

    if (debug) :
        print "X(t,b):"
        print xtb

    cl.enqueue_copy(queue, xo, xo_buf)

    if (debug) :
        print "X':"
        print xo

    xtb = sparse.csr_matrix(xtb)
    xo = sparse.csr_matrix(xo)

    x = []
    for i in range(0, partitionNumber) :
        t = i * (2 * offdiagonalSize)
        b = (i + 1) * (2 * offdiagonalSize)
        u = i * (partitionSize - 2 * offdiagonalSize)
        v = (i + 1) * (partitionSize - 2 * offdiagonalSize)
        x.append(xtb[t : t + offdiagonalSize, 0 : rhsSize])
        x.append(xo[u : v, 0 : rhsSize])
        x.append(xtb[b - offdiagonalSize : b, 0 : rhsSize])

    return sp.sparse.vstack(x)
开发者ID:white-gecko,项目名称:PySPIKE,代码行数:60,代码来源:solve.py

示例7: lnlikelihood_ocl

 def lnlikelihood_ocl(self, pv):
     self._lnl2d(pv)
     self.prg_lnl.lnl1d_chunked(self.cl_queue, [self.lnl2d.shape[0], self.cl_lnl_chunks], None,
                                uint32(self.lnl2d.shape[1]), self._b_lnl2d, self._b_lnl1d)
     cl.enqueue_copy(self.cl_queue, self.lnl1d, self._b_lnl1d)
     lnl = self.lnl1d.astype('d').sum(1)
     return lnl
开发者ID:hpparvi,项目名称:PyTransit,代码行数:7,代码来源:ocllpf.py

示例8: run

 def run(self):
     for ii in range(0,10):
         for jj in range(0,10):
             r = np.random.random([self.nsample,3])
             r[:,0]=(r[:,0]+ii)*0.1
             r[:,1]=(r[:,1]+jj)*0.1
             
             self.X = np.zeros((self.nsample,4), dtype = np.float32)
             self.X[:,0:3] = r
             self.X[:,3] = 1.
             
             self.I = np.zeros((self.nsample,4), dtype = np.float32)
             self.I[:,0:3] = 1.
             #self.I[:,3] = 0.
                             
             cl.enqueue_acquire_gl_objects(self.queue, [self.X_cl,self.I_cl])
             cl.enqueue_copy(self.queue, self.X_cl, self.X)
             cl.enqueue_copy(self.queue, self.I_cl, self.I)
             self.program.Solve(self.queue, (self.nsample, self.na), None, self.A_cl, self.X_cl, self.I_cl, self.alpha)
             cl.enqueue_release_gl_objects(self.queue, [self.X_cl,self.I_cl])
             self.queue.finish()             
             
             self.draw()
     
     self.scrnData = np.zeros((self.width,self.height), dtype = np.float32)
     glReadPixels(0, 0, self.width, self.height, GL_ALPHA, GL_FLOAT, self.scrnData)
     print np.max(self.scrnData)
     scipy.misc.imsave('render.png', np.flipud(self.scrnData))
开发者ID:temik42,项目名称:render,代码行数:28,代码来源:render.py

示例9: execute

	def execute(self):
		kernel = self.program.fact
		self.event = kernel(self.queue,[self.a_dim],None,self.d_a_buf,self.d_c_buf)
		self.event.wait()
		cl.enqueue_copy(self.queue, self.h_c, self.d_c_buf)
		print "a", self.h_a
		print "ris", self.h_c
开发者ID:fean9r,项目名称:FeaCL,代码行数:7,代码来源:ricorsione+parallela.py

示例10: get_edges

def get_edges(clctx, features, reductions, blurs, buf_in, summarise=True):
    """
    Using the *features* and *reductions* programs, and *blurs* program with
    sigma=2.0, find all edge pixels in *buf_in* and return the count.
    """
    gs, wgs = clctx.gs, clctx.wgs
    bufa = cl.Image(clctx.ctx, cl.mem_flags.READ_WRITE, clctx.ifmt, (gs, gs))
    bufb = cl.Image(clctx.ctx, cl.mem_flags.READ_WRITE, clctx.ifmt, (gs, gs))
    bufc = cl.Image(clctx.ctx, cl.mem_flags.READ_WRITE, clctx.ifmt, (gs, gs))

    blurs.convolve_x(clctx.queue, (gs, gs), (wgs, wgs), buf_in, bufb)
    blurs.convolve_y(clctx.queue, (gs, gs), (wgs, wgs), bufb, bufa)
    blurs.convolve_x(clctx.queue, (gs, gs), (wgs, wgs), bufa, bufc)
    blurs.convolve_y(clctx.queue, (gs, gs), (wgs, wgs), bufc, bufb)

    features.subtract(clctx.queue, (gs, gs), (wgs, wgs), bufb, bufa, bufc)
    features.edges(clctx.queue, (gs, gs), (wgs, wgs), bufc, bufa)
    counts = reduction.run_reduction(clctx, reductions.reduction_sum, bufa)

    if not summarise:
        edges = np.empty((gs, gs, 4), np.float32)
        cl.enqueue_copy(clctx.queue, edges, bufa,
                        origin=(0, 0), region=(gs, gs))

    bufa.release()
    bufb.release()
    bufc.release()

    if summarise:
        return counts
    else:
        return edges
开发者ID:adamgreig,项目名称:iib,代码行数:32,代码来源:features.py

示例11: __init__

    def __init__(self):
        t_np = np.arange(0, 100000000, dtype=np.float32)

        self.ctx = cl.create_some_context()
        self.queue = cl.CommandQueue(self.ctx)

        self.mf = cl.mem_flags
        self.t_g = cl.Buffer(
            self.ctx,
            self.mf.READ_ONLY | self.mf.COPY_HOST_PTR,
            hostbuf=t_np)

        f = open("ex.cl", "r")
        fstr = "".join(f.readlines())
        f.close()
        self.prg = cl.Program(self.ctx, fstr).build()

        self.res_g = cl.Buffer(self.ctx, self.mf.WRITE_ONLY, t_np.nbytes)
        self.prg.proc(self.queue, t_np.shape, None, self.t_g, self.res_g)

        res_np = np.empty_like(t_np)
        cl.enqueue_copy(self.queue, res_np, self.res_g)

        # Check on CPU with Numpy:
        print(res_np)
        print(np.amax(res_np))
开发者ID:ignatenkobrain,项目名称:killer-waves,代码行数:26,代码来源:killer.py

示例12: to_host

def to_host(queue, data, dtype, start, shape, elemstrides):
    """Copy memory off the device, into a Numpy array"""

    m, n = shape
    Sm, Sn = elemstrides
    if m * n == 0:
        return np.zeros(shape, dtype=dtype)

    if min(elemstrides) < 0:
        raise NotImplementedError()

    itemsize = dtype.itemsize
    bytestart = itemsize * start
    # -- TODO: is there an extra element transferred here?
    byteend = bytestart + itemsize * ((m-1) * Sm + (n-1) * Sn + 1)

    temp_buf = np.zeros((byteend - bytestart), dtype=np.int8)
    cl.enqueue_copy(queue, temp_buf, data,
                    device_offset=bytestart, is_blocking=True)

    bytestrides = (itemsize * Sm, itemsize * Sn)
    try:
        view = np.ndarray(
            shape=(m, n),
            dtype=dtype,
            buffer=temp_buf.data,
            offset=0,
            strides=bytestrides)
    except:
        raise
    return view
开发者ID:bptripp,项目名称:nengo_ocl,代码行数:31,代码来源:clraggedarray.py

示例13: get_color

	def get_color(self, img):
		# OpenCL only supports RGBA images, not RGB, so add an alpha channel
		src = np.array(img.convert('RGBA'))
		src.shape = w, h, _ = img.width, img.height, 4

		w = int(w * self.SCALE_FACTOR)
		h = int(h * self.SCALE_FACTOR)

		local_size = self.max_work_item_sizes
		global_size = (math.ceil(h / local_size[0]), math.ceil(w / local_size[1]))

		total_work_groups = global_size[0] * global_size[1]

		mf = cl.mem_flags
		src_buf = cl.image_from_array(self.ctx, src, 4, norm_int=True)

		out = np.zeros(4 * total_work_groups, dtype=np.int32)
		out_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, size=out.itemsize * 4 * total_work_groups)

		kernel = self.prg.get_color
		kernel.set_scalar_arg_dtypes([None, None, np.uint32, np.uint32])
		kernel(self.queue, global_size, local_size, src_buf, out_buf, w, h, g_times_l=True)

		cl.enqueue_copy(self.queue, dest=out, src=out_buf, is_blocking=True)

		# this sum takes .1 ms at 3440x1440, don't even bother OpenCL-ifying it
		resized_out = np.reshape(out, (out.shape[0] / 4, 4))
		summed_out = np.sum(resized_out, axis=0)

		avg_out = (summed_out / summed_out[3])[:3].astype(int)

		return avg_out
开发者ID:karmeleon,项目名称:LEDBacklightPi,代码行数:32,代码来源:cl_mean_extractor.py

示例14: likelihood

    def likelihood(self, outcomes, modelparams, expparams):
        # By calling the superclass implementation, we can consolidate
        # call counting there.
        super(AcceleratedPrecessionModel, self).likelihood(outcomes, modelparams, expparams)
        
        # Possibly add a second axis to modelparams.
        if len(modelparams.shape) == 1:
            modelparams = modelparams[..., np.newaxis]
        
        # Convert to float32 if needed.
        mps = modelparams.astype(np.float32)
        eps = expparams.astype(np.float32)

        # Allocating a buffer for the pr0 returns.
        pr0 = np.empty((mps.shape[0], eps.shape[0]), dtype=mps.dtype)

        # Move buffers to the GPU.
        mf = cl.mem_flags
        
        mps_buf = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=mps)
        eps_buf = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=eps)
        dest_buf = cl.Buffer(self._ctx, mf.WRITE_ONLY, pr0.nbytes)

        # Run the kernel with global worksize (n_models, n_experiments).
        self._prg.cos_model(self._queue, pr0.shape, None, np.int32(eps.shape[0]), mps_buf, eps_buf, dest_buf)

        # Copy the buffer back from the GPU and free memory there.
        cl.enqueue_copy(self._queue, pr0, dest_buf)
        mps_buf.release()
        eps_buf.release()
        dest_buf.release()
        
        # Now we concatenate over outcomes.
        return FiniteOutcomeModel.pr0_to_likelihood_array(outcomes, pr0)
开发者ID:QInfer,项目名称:python-qinfer,代码行数:34,代码来源:gpu_models.py

示例15: eval

    def eval(self, pars):

        _ctx,queue = card()
        radius, length = \
            [GaussianDispersion(int(pars[base+'_pd_n']), pars[base+'_pd'], pars[base+'_pd_nsigma'])
             for base in OneDGpuCylinder.PD_PARS]

        #Get the weights for each
        radius.value, radius.weight = radius.get_weights(pars['radius'], 0, 10000, True)
        length.value, length.weight = length.get_weights(pars['length'], 0, 10000, True)

        #Perform the computation, with all weight points
        sum, norm, vol = 0.0, 0.0, 0.0,
        sub = pars['sldCyl'] - pars['sldSolv']

        real = np.float32 if self.q.dtype == np.dtype('float32') else np.float64
        #Loop over radius, length, theta, phi weight points
        for r in xrange(len(radius.weight)):
            for l in xrange(len(length.weight)):
                        self.prg.OneDCylKernel(queue, self.q.shape, None, self.q_b, self.res_b, real(sub),
                                           real(length.value[l]), real(radius.value[r]), real(pars['scale']),
                                           np.uint32(self.q.size), real(pars['uplim']), real(pars['bolim']))
                        cl.enqueue_copy(queue, self.res, self.res_b)
                        sum += radius.weight[r]*length.weight[l]*self.res*pow(radius.value[r],2)*length.value[l]
                        vol += radius.weight[r]*length.weight[l] *pow(radius.value[r],2)*length.value[l]
                        norm += radius.weight[r]*length.weight[l]

        if vol != 0.0 and norm != 0.0:
            sum *= norm/vol

        return sum/norm + pars['background']
开发者ID:HMP1,项目名称:Sasmodels,代码行数:31,代码来源:code_cylinder_f.py


注:本文中的pyopencl.enqueue_copy函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。