当前位置: 首页>>代码示例>>Python>>正文


Python pyopencl.enqueue_nd_range_kernel函数代码示例

本文整理汇总了Python中pyopencl.enqueue_nd_range_kernel函数的典型用法代码示例。如果您正苦于以下问题:Python enqueue_nd_range_kernel函数的具体用法?Python enqueue_nd_range_kernel怎么用?Python enqueue_nd_range_kernel使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了enqueue_nd_range_kernel函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: do_opencl_pow

def do_opencl_pow(hash, target):
	output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
	if (ctx == False):
		return output[0][0]
	
	data = numpy.zeros(1, dtype=hash_dt, order='C')
	data[0]['v'] = ("0000000000000000" + hash).decode("hex")
	data[0]['target'] = target
	
	hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
	dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
	
	kernel = program.kernel_sha512
	worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, cl.get_platforms()[0].get_devices()[1])

	kernel.set_arg(0, hash_buf)
	kernel.set_arg(1, dest_buf)

	start = time.time()
	progress = 0
	globamt = worksize*2000

	while output[0][0] == 0:
		kernel.set_arg(2, pack("<Q", progress))
		cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
		cl.enqueue_read_buffer(queue, dest_buf, output)
		queue.finish()
		progress += globamt
		sofar = time.time() - start
		print sofar, progress / sofar, "hashes/sec"
	taken = time.time() - start
	print progress, taken
	return output[0][0]
开发者ID:N0U,项目名称:PyBitmessage,代码行数:33,代码来源:openclpow.py

示例2: do_opencl_pow

def do_opencl_pow(hash, target):
    global ctx, queue, program, gpus, hash_dt

    output = numpy.zeros(1, dtype=[("v", numpy.uint64, 1)])
    if ctx == False:
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order="C")
    data[0]["v"] = ("0000000000000000" + hash).decode("hex")
    data[0]["target"] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, gpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize * 2000

    while output[0][0] == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
    # 		logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    taken = time.time() - start
    # 	logger.debug("Took %d tries.", progress)
    return output[0][0]
开发者ID:Basti1993,项目名称:PyBitmessage,代码行数:35,代码来源:openclpow.py

示例3: do_opencl_pow

def do_opencl_pow(hash, target):
    output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
    if (len(enabledGpus) == 0):
        return output[0][0]

    data = numpy.zeros(1, dtype=hash_dt, order='C')
    data[0]['v'] = ("0000000000000000" + hash).decode("hex")
    data[0]['target'] = target

    hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
    dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)

    kernel = program.kernel_sha512
    worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, enabledGpus[0])

    kernel.set_arg(0, hash_buf)
    kernel.set_arg(1, dest_buf)

    start = time.time()
    progress = 0
    globamt = worksize*2000

    while output[0][0] == 0 and shutdown == 0:
        kernel.set_arg(2, pack("<Q", progress))
        cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
        cl.enqueue_read_buffer(queue, dest_buf, output)
        queue.finish()
        progress += globamt
        sofar = time.time() - start
#       logger.debug("Working for %.3fs, %.2f Mh/s", sofar, (progress / sofar) / 1000000)
    if shutdown != 0:
        raise Exception ("Interrupted")
    taken = time.time() - start
#   logger.debug("Took %d tries.", progress)
    return output[0][0]
开发者ID:Bitmessage,项目名称:PyBitmessage,代码行数:35,代码来源:openclpow.py

示例4: max_length_real4

def max_length_real4(ipt):
     out = CLReal(len(ipt)) 
     kern = _lengthkern_real4.kern
     kern.set_arg(0, ipt._buffer)
     kern.set_arg(1, out._buffer)
     cl.enqueue_nd_range_kernel(ipt._ctrl.clqueue, kern, (len(ipt),), None)
     return max_reduce(out)
开发者ID:hagisgit,项目名称:SLIC,代码行数:7,代码来源:tools.py

示例5: __call__

 def __call__(self, thread_count, work_group_size, *args):
     fun = self.compile()
     for i, arg in enumerate(args):
         fun.set_arg(i, arg)
     with timed_region("ParLoop kernel"):
         cl.enqueue_nd_range_kernel(_queue, fun, (thread_count,),
                                    (work_group_size,), g_times_l=False).wait()
开发者ID:GitPaean,项目名称:PyOP2,代码行数:7,代码来源:opencl.py

示例6: filterPrepare

 def filterPrepare(self, e, data, keys, ndata, events):
     import numpy as np
     import pyopencl as cl
     mf = cl.mem_flags
     
     ndata = data.size
     if keys.size != ndata: raise Exception()
     
     filtbytes = np.bool8(False).nbytes * ndata
     
     if not isinstance(data, cl.Buffer):
         data_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= data)
     else:
         data_buf = data
     
     if not isinstance(keys, cl.Buffer):
         keys_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= keys)
     else:
         keys_buf = keys
     
     filt_buf = cl.Buffer(self.ctx, mf.READ_WRITE, filtbytes)
     
     kernel = self.prg.filterPrepare
     kernel.set_args(data_buf, keys_buf, np.uint64(ndata), np.uint8(33), np.uint8(66), filt_buf)
     global_dims = self.get_global(self.get_grid_dims(ndata))
     
     print "filterPrepare"
     if e is None:
         e  = [ cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), ]
     else:
         e  = [ cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), ]
     events += e
     
     return (e, data_buf, keys_buf, filt_buf)
开发者ID:Kobtul,项目名称:documents,代码行数:34,代码来源:filter.py

示例7: prefixSumUp

 def prefixSumUp(self, e, data, ndata, data2, ndata2, events):
     import numpy as np
     import pyopencl as cl
     mf = cl.mem_flags
     
     if not isinstance(data, cl.Buffer):
         data_buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=data)
     else:
         data_buf = data
     
     if not isinstance(data2, cl.Buffer):
         data2_buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=data2)
     else:
         data2_buf = data2
             
     kernel = self.prg.prefixSumUp
     kernel.set_args(data_buf, np.uint64(ndata), data2_buf, np.uint64(ndata2))
     
     global_dims = self.get_global(self.get_grid_dims(ndata))
     
     print "prefixSumUp"
     if e is None:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), )
     else:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), )
     events += e
     
     return (e, data_buf, data2_buf)
开发者ID:Kobtul,项目名称:documents,代码行数:28,代码来源:filter.py

示例8: exec_lsz_safe

 def exec_lsz_safe(self, localsize):
     """execute the kernel with specific localsize.
     Safe also for lernels with local variables"""
     oldloc = int(self._localsize)
     self.localsize = localsize
     cl.enqueue_nd_range_kernel(self._solverobj.clqueue, self._clkernel, (self.globalsize,), (self.localsize,))
     self._solverobj.clqueue.finish()
     self.localsize = oldloc 
开发者ID:hagisgit,项目名称:qcl,代码行数:8,代码来源:QclKernel.py

示例9: test_algorithm

    def test_algorithm(self):
        print "\n**************************"
        print "test_pbrs:"
        passed = 0
        buffersize_in = 188*8
        buffersize_out = 188*8
        # opencl buffer uint
        self.inputbuffer = cl.Buffer(self.ctx , cl.mem_flags.READ_WRITE, size=buffersize_in*4)
        # opencl buffer uint
        self.outputbuffer = cl.Buffer(self.ctx , cl.mem_flags.READ_WRITE, size=buffersize_out*4)

        for k in self.kernelname:
            kernel = self.load_kernel(self.filename, k)
            passed = 0
            self.fd_input = open('test_bench_pbrs_input.csv', 'r')
            self.fd_output = open('test_bench_pbrs_output.csv', 'r')
            for j in range(0,6):
                encoded_data = numpy.array(numpy.zeros(buffersize_out/4), dtype=numpy.uint32)
                data_to_encode = string.replace(self.fd_input.readline(),'\n','')
                reference_data = string.replace(self.fd_output.readline(),'\n','')
                for i in range(0,7):
                    data_to_encode = "%s,%s" % (data_to_encode, string.replace(self.fd_input.readline(),'\n',''))
                    reference_data = "%s,%s" % (reference_data, string.replace(self.fd_output.readline(),'\n',''))

                data_to_encode = numpy.fromstring(numpy.fromstring(data_to_encode, dtype=numpy.uint8, sep=",").tostring(), dtype=numpy.uint32)
                reference_data = numpy.fromstring(reference_data, dtype=numpy.uint8, sep=",")

                cl.enqueue_copy(self.queue, self.inputbuffer, data_to_encode).wait()
                kernel.set_args(self.inputbuffer, self.outputbuffer)
                cl.enqueue_nd_range_kernel(self.queue,kernel,(8,),(8,),None ).wait()
                cl.enqueue_copy(self.queue, encoded_data, self.outputbuffer).wait()
                encoded_data = (numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8))

                
                if encoded_data.tostring() == reference_data.tostring():
                    passed += 1
                    print "Test %d PASSED" % (j+1)
                else:
                    print "Test %d FAILED" % (j+1)
                    print "input data:"
                    print numpy.fromstring(data_to_encode.tostring(), dtype=numpy.uint8)
                    print "encoded data:"
                    print numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8)
                    print "reference data:"
                    print reference_data
                    print "error data:"
                    print (reference_data - numpy.fromstring(encoded_data.tostring(), dtype=numpy.uint8))
            print "%d pass out of 6" % passed
            self.fd_input.close()
            self.fd_output.close()
            if passed == 6:
                print "All pbrs tests PASS\n"
                return True
            else:
                print "at least one pbrs test FAILED\n"
                return False
开发者ID:das-labor,项目名称:dvbt,代码行数:56,代码来源:create_pbrs_kernel.py

示例10: max_reduce_real4

def max_reduce_real4(ipt):
     x = CLReal(len(ipt)) 
     y = CLReal(len(ipt))
     z = CLReal(len(ipt))
     kern = _splitkern_real4.kern
     kern.set_arg(0, ipt._buffer)
     kern.set_arg(1, x._buffer)
     kern.set_arg(2, y._buffer)
     kern.set_arg(3, z._buffer)
     cl.enqueue_nd_range_kernel(ipt._ctrl.clqueue, kern, (len(ipt),), None)
     return max_reduce(x), max_reduce(y), max_reduce(z)
开发者ID:hagisgit,项目名称:SLIC,代码行数:11,代码来源:tools.py

示例11: prefixSum

 def prefixSum(self, e, data, keys, ndata, low, hi, events):
     import numpy as np
     import pyopencl as cl
     mf = cl.mem_flags
     
     if not isinstance(data, cl.Buffer):
         data_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= data)
     else:
         data_buf = data
     
     if not isinstance(keys, cl.Buffer):
         keys_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf= keys)
     else:
         keys_buf = keys
     
     grid_dims = self.get_grid_dims(ndata)
     psumbytes = ndata * np.uint64(0).nbytes
     bsumbytes =  int(np.prod(grid_dims) * np.uint64(0).nbytes)
     nbsumbytes =  np.uint64(0).nbytes
     
     psum_buf = cl.Buffer(self.ctx, mf.READ_WRITE, psumbytes)
     bsum_buf = cl.Buffer(self.ctx, mf.READ_WRITE, bsumbytes)
     nbsum_buf = cl.Buffer(self.ctx, mf.READ_WRITE, nbsumbytes)
     
     low = PrefixSum.HOST_TYPE_KEYS(low)
     hi = PrefixSum.HOST_TYPE_KEYS(hi)
     
     kernel = self.prg.prefixSumDown
     kernel.set_args(data_buf, keys_buf, np.uint64(ndata), low, hi, psum_buf, bsum_buf, nbsum_buf)
     
     global_dims = self.get_global(grid_dims)
     
     print "prefixSumDown %s %s" % (str(global_dims), str(self.localDims))
     if e is None:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), )
     else:
         e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), )
     events += e
     
     nbsum = np.zeros(1, dtype = np.uint64)
     events += (cl.enqueue_copy(self.queue, nbsum, nbsum_buf, wait_for=e),)
     
     if nbsum>1:
         (e, bsum_buf, bsum1_buf, nbsum1_buf, ndata2) = self.prefixSumDownInplace(e, bsum_buf, nbsum.item(), events)
     else:
         ndata2 = np.zeros(1, dtype = np.uint64)
         events += (cl.enqueue_copy(self.queue, ndata2, bsum_buf, wait_for=e),)
         ndata2 = ndata2.item()
         print ndata2
     
     self.prefixSumUp(e, psum_buf, ndata, bsum_buf, nbsum, events)
     
     return (e, data_buf, keys_buf, psum_buf, bsum_buf, nbsum_buf, ndata2)
开发者ID:Kobtul,项目名称:documents,代码行数:53,代码来源:filter.py

示例12: solve

    def solve(self,puzzle,simulations = 16384, iterations = 35, workGroupSize = 128):
        self.simulations = simulations
        self.iterations = iterations
        self.workGroupSize = workGroupSize
        self.workGroups = int(self.simulations / self.workGroupSize)
        self.width = np.int8(puzzle['width'])
        self.height = np.int8(puzzle['height'])
        
        #initialise buffers
        self.initBuffers(puzzle)
        
        #create kernel
        self.kernel = cl.Kernel(self.program,"montecarlo")
        self.kernel.set_args(self.lengthsBuffer,self.groupLengthsBuffer,self.puzzlesBuffer,self.solutionsBuffer,self.height,self.width,np.int32(self.iterations))
        
        #execute program for a number of iterations
        cl.enqueue_nd_range_kernel(self.queue,self.kernel,(self.simulations,),(self.workGroupSize,))
        
        #unmap group lengths buffer from device
        cl.enqueue_map_buffer(self.queue,self.groupLengthsBuffer,cl.map_flags.WRITE,0,self.groupLengths.shape,self.groupLengths.dtype)
        self.groupLengths = self.groupLengthsBuffer.get_host_array(self.groupLengths.shape,dtype=self.groupLengths.dtype)

        #unmap solutions buffer from device
        cl.enqueue_map_buffer(self.queue,self.solutionsBuffer,cl.map_flags.WRITE,0,self.solutionsFlattened.shape,self.solutions.dtype)
        self.solutions = self.solutionsBuffer.get_host_array(self.solutions.shape,dtype=self.solutions.dtype)
        
        #release buffers
        self.lengthsBuffer.release()
        self.groupLengthsBuffer.release()
        self.puzzlesBuffer.release()
        self.solutionsBuffer.release()

        #get the best solution
        i = self.groupLengths.argmin()
        bestSolution = np.array(self.solutions[i])
        
        #convert solution to list format used by challenge
        solution = []
        for row in range(0,puzzle['height']):
            for col in range(0,puzzle['width']):
                if bestSolution[row][col]!=-1:
                    s = bestSolution[row][col]
                    
                    #add to solution list
                    solution.append({'X': int(col),'Y': int(row),'Size':int(s)})
                    
                    #clear cells in solution
                    for i in range(0,s):
                        for j in range(0,s):
                            bestSolution[row+i][col+j]=-1
        
        return solution
开发者ID:ohlord,项目名称:cimpress,代码行数:52,代码来源:CLSolve.py

示例13: filter

    def filter(self, data, keys, low, hi, events):
        import numpy as np
        import pyopencl as cl
        mf = cl.mem_flags
        
        ndata = data.size
        
        (e, data_buf, keys_buf, indices_buf, bsum_buf, nbsum_buf, ndata2) = self.prefixSum(None, data, keys, ndata, low, hi, events)
        
        filt = np.zeros(ndata, dtype = np.bool8)
        indices = np.zeros(ndata, dtype = np.uint64)
        data2 = np.zeros(ndata2, dtype = PrefixSum.HOST_TYPE_DATA)
        keys2 = np.zeros(ndata2, dtype = PrefixSum.HOST_TYPE_KEYS)
        
        ndata2bytes = np.uint64(0).nbytes
        
        if PrefixSum.RETURN_FILTER == 1:
            filt_buf = cl.Buffer(self.ctx, mf.READ_WRITE, filt.nbytes)
        print data2.nbytes
        data2_buf = cl.Buffer(self.ctx, mf.READ_WRITE, data2.nbytes)
        keys2_buf = cl.Buffer(self.ctx, mf.READ_WRITE, keys2.nbytes)
        ndata2_buf = cl.Buffer(self.ctx, mf.READ_WRITE, ndata2bytes)
        
        low = PrefixSum.HOST_TYPE_KEYS(low)
        hi = PrefixSum.HOST_TYPE_KEYS(hi)

        kernel = self.prg.filter
        if PrefixSum.RETURN_FILTER == 1:
            kernel.set_args(data_buf, keys_buf, indices_buf, np.uint64(ndata), low, hi, filt_buf, data2_buf, keys2_buf, ndata2_buf)
        else:
            kernel.set_args(data_buf, keys_buf, indices_buf, np.uint64(ndata), low, hi, data2_buf, keys2_buf, ndata2_buf)
        
        global_dims = self.get_global(self.get_grid_dims(ndata))
        
        print "filter"
        if e is None:
            e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims, wait_for=e), )
        else:
            e  = ( cl.enqueue_nd_range_kernel(self.queue, kernel, global_dims, self.localDims), )
        events += e
        
        if PrefixSum.RETURN_FILTER == 1:
            events += ( cl.enqueue_copy(self.queue, filt, filt_buf, wait_for=e), 
                        cl.enqueue_copy(self.queue, indices, indices_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, data2, data2_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, keys2, keys2_buf, wait_for=e) )
        else:
            events += ( cl.enqueue_copy(self.queue, indices, indices_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, data2, data2_buf, wait_for=e),
                        cl.enqueue_copy(self.queue, keys2, keys2_buf, wait_for=e) )
        
        return (filt, indices, data2, keys2)
开发者ID:Kobtul,项目名称:documents,代码行数:52,代码来源:filter.py

示例14: _exec_chunked_unsafe

 def _exec_chunked_unsafe(self, chunksize=0):
     """Unsafe for kernels with local variables."""
     if chunksize > 0:
         self._prep_chunked_exec(chunksize)
     lenarr = self.leadingvar.length
     ncnk = int(ceil(float(lenarr)/float(self._cnksz)))
     cnksz = self._cnksz
     for i in range(ncnk):
         if (i == (ncnk - 1)) and not(lenarr % cnksz == 0):
             cnksz = lenarr % cnksz
         self._solverobj.__setattr__(self._cnk_name, i)
         cl.enqueue_nd_range_kernel(self._solverobj.clqueue, self._clkernel, (cnksz,), None)
     self._solverobj.clqueue.finish() 
开发者ID:hagisgit,项目名称:qcl,代码行数:13,代码来源:QclKernel.py

示例15: change_display

def change_display(image) :

    image_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=image)
    mem = cl.GLBuffer(ctx, mf.WRITE_ONLY, numpy.float32(buf))

    cl.enqueue_acquire_gl_objects(queue, [mem])
    add_knl = prog.add
    add_knl.set_args(image_buf, mem)
    cl.enqueue_nd_range_kernel(queue, add_knl, image.shape, None)
    cl.enqueue_release_gl_objects(queue, [mem])

    queue.finish()
    glFlush()
开发者ID:Blother,项目名称:Python_Interop,代码行数:13,代码来源:simple_interop.py


注:本文中的pyopencl.enqueue_nd_range_kernel函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。