本文整理汇总了Python中pyopencl.enqueue_read_buffer函数的典型用法代码示例。如果您正苦于以下问题:Python enqueue_read_buffer函数的具体用法?Python enqueue_read_buffer怎么用?Python enqueue_read_buffer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了enqueue_read_buffer函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: map_function
def map_function(data):
proc = subprocess.Popen(["../bin/get-host-platform-device.sh"], stdout=subprocess.PIPE, shell=True)
(proc_out, err) = proc.communicate()
[SPARKCL_PLATFORM , SPARKCL_DEVICE] = proc_out.split()
KERNEL_CODE="""
__kernel void ArraySum(__global float *A,__global float *B,__global float *C){
int i = get_global_id(0);
C[i] = A[i]+B[i];
}
"""
cl_device=cl.get_platforms()[int(SPARKCL_PLATFORM)].get_devices()[int(SPARKCL_DEVICE)]
ctx = cl.Context([cl_device])
queue = cl.CommandQueue(ctx)
prg = cl.Program(ctx, KERNEL_CODE).build()
kernel = prg.ArraySum
mf = cl.mem_flags
np_data = []
np_data.append(np.array(data[0]).astype(np.float32))
np_data.append(np.array(data[1]).astype(np.float32))
data_buf = []
data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[0]))
data_buf.append(cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_data[1]))
result = np.zeros((5, )).astype(np.float32)
result_buf = cl.Buffer(ctx, mf.WRITE_ONLY, result.nbytes)
kernel(queue,(5,),None,data_buf[0],data_buf[1],result_buf)
cl.enqueue_read_buffer(queue, result_buf, result).wait()
return result
示例2: mineThread
def mineThread(self):
for data in self.qr:
for i in range(data.iterations):
offset = (unpack('I', data.base[i])[0],) if self.GOFFSET else None
self.kernel.search(
self.commandQueue, (data.size, ), (self.WORKSIZE, ),
data.state[0], data.state[1], data.state[2], data.state[3],
data.state[4], data.state[5], data.state[6], data.state[7],
data.state2[1], data.state2[2], data.state2[3],
data.state2[5], data.state2[6], data.state2[7],
data.base[i],
data.f[0], data.f[1], data.f[2], data.f[3],
data.f[4], data.f[5], data.f[6], data.f[7],
self.output_buf, global_offset=offset)
cl.enqueue_read_buffer(self.commandQueue, self.output_buf,
self.output, is_blocking=False)
self.commandQueue.finish()
# The OpenCL code will flag the last item in the output buffer
# when it finds a valid nonce. If that's the case, send it to
# the main thread for postprocessing and clean the buffer
# for the next pass.
if self.output[self.WORKSIZE]:
reactor.callFromThread(self.postprocess,
self.output.copy(), data.nr)
self.output.fill(0)
cl.enqueue_write_buffer(self.commandQueue, self.output_buf,
self.output, is_blocking=False)
示例3: execute
def execute(self):
self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.b_buf, self.dest_buf)
c = numpy.empty_like(self.a)
cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
print "a", self.a
print "b", self.b
print "c", c
示例4: exchange_boundary_e
def exchange_boundary_e(s):
for queue, eh_fields, tmpf in zip(s.queues, s.eh_fields_gpus, s.tmpfs)[1:]:
cl.enqueue_read_buffer(queue, eh_fields[1], tmpf[0]) # ey_gpu
cl.enqueue_read_buffer(queue, eh_fields[2], tmpf[1]) # ez_gpu
for queue, eh_fields, tmpf, offset in zip(s.queues[:-1], s.eh_fields_gpus[:-1], s.tmpfs[1:], s.offsets[:-1]):
cl.enqueue_write_buffer(queue, eh_fields[1], tmpf[0], offset)
cl.enqueue_write_buffer(queue, eh_fields[2], tmpf[1], offset)
示例5: test_that_python_args_fail
def test_that_python_args_fail(ctx_factory):
context = ctx_factory()
prg = cl.Program(context, """
__kernel void mult(__global float *a, float b, int c)
{ a[get_global_id(0)] *= (b+c); }
""").build()
a = np.random.rand(50000)
queue = cl.CommandQueue(context)
mf = cl.mem_flags
a_buf = cl.Buffer(context, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)
knl = cl.Kernel(prg, "mult")
try:
knl(queue, a.shape, None, a_buf, 2, 3)
assert False, "PyOpenCL should not accept bare Python types as arguments"
except cl.LogicError:
pass
try:
prg.mult(queue, a.shape, None, a_buf, float(2), 3)
assert False, "PyOpenCL should not accept bare Python types as arguments"
except cl.LogicError:
pass
prg.mult(queue, a.shape, None, a_buf, np.float32(2), np.int32(3))
a_result = np.empty_like(a)
cl.enqueue_read_buffer(queue, a_buf, a_result).wait()
示例6: transform_uint32
def transform_uint32(self, data_np,
flip_x=False, flip_y=False, swap_xy=False,
out=None):
height, width = data_np.shape[:2]
new_ht, new_wd = height, width
if swap_xy:
new_ht, new_wd = width, height
new_size = [new_ht, new_wd] + list(data_np.shape[2:])
mf = cl.mem_flags
#create OpenCL buffers on devices
data_np = np.ascontiguousarray(data_np)
src_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=data_np)
dst_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, data_np.nbytes)
evt = self.program.image_transform_uint32(self.queue, [height, width], None,
src_buf, dst_buf,
np.int32(width), np.int32(height),
np.int32(flip_x), np.int32(flip_y),
np.int32(swap_xy))
if out is None:
out = np.empty_like(data_np).reshape(new_size)
cl.enqueue_read_buffer(self.queue, dst_buf, out).wait()
return out
示例7: test_opencl_0
def test_opencl_0(zz, a, b, c_result):
for platform in cl.get_platforms():
for device in [platform.get_devices()[1]]:
print("===============================================================")
print("Platform name:", platform.name)
print("Platform profile:", platform.profile)
print("Platform vendor:", platform.vendor)
print("Platform version:", platform.version)
print("---------------------------------------------------------------")
print("Device name:", device.name)
print("Device type:", cl.device_type.to_string(device.type))
print("Device memory: ", device.global_mem_size//1024//1024, 'MB')
print("Device max clock speed:", device.max_clock_frequency, 'MHz')
print("Device compute units:", device.max_compute_units)
# Simnple speed test
ctx = cl.Context([device])
queue = cl.CommandQueue(ctx,
properties=cl.command_queue_properties.PROFILING_ENABLE)
mf = cl.mem_flags
a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
prg = cl.Program(ctx, """
__kernel void sum(__global const double *a,
__global const double *b, __global double *c)
{
int loop;
int gid = get_global_id(0);
for(loop=0; loop<%s;loop++)
{
c[gid] = a[gid] + b[gid];
c[gid] = c[gid] * (a[gid] + b[gid]);
c[gid] = c[gid] * (a[gid] / 2);
c[gid] = log(exp(c[gid]));
}
}
""" % (zz)).build()
exec_evt = prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
exec_evt.wait()
elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)
print("Execution time of test: %g s" % elapsed)
c = numpy.empty_like(a)
cl.enqueue_read_buffer(queue, dest_buf, c).wait()
error = 0
for i in range(zz):
if c[i] != c_result[i]:
print("c_i: ", c[i], " c_results_i: ", c_result[i])
print("diff: ", numpy.abs(c[i] - c_result[i]))
error = 1
if error:
print("Results doesn't match!!")
else:
print("Results OK")
示例8: update
def update(self, sub_pos, angle, min_dist, max_dist, width, in_weight, out_weight):
'''
Perform one update on the probabilities by using the evidence that
the sub is at position sub_pos, the target is seen at an absolute heading
of `angle` and is most likely between min_dist and max_dist away.
in_weight gives the chance that for every point in the region,
if the buoy is there then we would get this result
i.e. in_weight = P(this measurement | buoy at point p) for p in our region
out_weight is the same but for points outside the region
'''
n,e = sub_pos
cl_program.evidence(cl_queue, self.norths.shape, None,
self.norths_buf, self.easts_buf, self.prob_buf,
float32(n), float32(e),
float32(radians(angle)),
float32(min_dist**2),
float32(max_dist**2),
float32(width),
float32(in_weight),
float32(out_weight))
#TODO ?
cl.enqueue_read_buffer(cl_queue, self.prob_buf, self.probabilities).wait()
#Normalize
total_prob = numpy.sum( self.probabilities )
self.probabilities /= total_prob
cl.enqueue_write_buffer(cl_queue, self.prob_buf, self.probabilities)
示例9: execute
def execute(self, settings):
self.program.mandel(self.queue, (self.c_real.shape[0], ), None,
self.real_buf, self.imag_buf, self.depth_buf,
self.dest_buf)
counts = np.zeros(settings.dim**2, dtype=np.int32)
cl.enqueue_read_buffer(self.queue, self.dest_buf, counts).wait()
return counts.reshape([settings.dim, settings.dim])
示例10: transform
def transform(self):
"""Realizes the calculus"""
# Prepare the input and output memory
mf = cl.mem_flags
msg = np.char.array(self.Word_buffer)
len_array = np.array(self.len).astype(np.int32)
Hexdigest_array = np.char.array(['']*41*(len(len_array)))
print msg
print msg.nbytes
print len_array
print len_array.nbytes
print Hexdigest_array
print Hexdigest_array.nbytes
# Allocate device memory
msg_buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, msg.nbytes, msg)
len_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, len_array.nbytes, len_array)
Hexdigest_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY, Hexdigest_array.nbytes)
# Start OpenCL operation and wait for it to finish
time1 = datetime.datetime.now()
self.prg.sha1(self.queue, (len(len_array),), msg_buf, len_buf, Hexdigest_buf)
cl.enqueue_read_buffer(self.queue, Hexdigest_buf, Hexdigest_array).wait()
time2 = datetime.datetime.now()
print "Execution time OpenCL sha1: " + repr((time2 - time1).microseconds/1000) + "ms"
# Convert the result into strings
for j in range(0,len(Hexdigest_array)/41):
self.result.append(''.join(Hexdigest_array[j*41 + 0:j*41 + 41]))
示例11: execute
def execute(self):
self.program.part1(self.queue, self.a.shape, None, self.a_buf, self.b_buf, self.dest_buf)
c = numpy.array(range(10), dtype=numpy.uint32)
cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
print "a", self.a
print "b", self.b
print "c", c
示例12: execute
def execute(self, *args, **kwargs):
self.load_data(*args, **kwargs)
self.program.program__(self.queue, self.a.shape, None,
self.a_buf, self.b_buf, self.dest_buf)
c = np.empty_like(self.a)
cl.enqueue_read_buffer(self.queue, self.dest_buf, c).wait()
return c
示例13: execute
def execute(self):
""" This handles the actual execution for the processing, which would
get executed on each request - this is where we care about the
performance
"""
timing.timings.start("execute")
# Start the program
self.program.worker(self.queue, self.data1.shape, None, self.data1_buf, self.data2_buf, self.dest_buf)
# Get an empty numpy array in the shape of the original data
result = numpy.empty_like(self.data1)
# Wait for result
cl.enqueue_read_buffer(self.queue, self.dest_buf, result).wait()
# show timing info
timing.timings.stop("execute")
finish = timing.timings.timings["execute"]["timings"][-1]
print "<<< DONE in %s" % (finish)
# Open data file to append to
data_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../data.csv"), "a")
data_file.write("PyOpenCl %s,%s,%s,%s\n" % (process_type, finish, num_records, num_calculations))
data_file.close()
示例14: do_opencl_pow
def do_opencl_pow(hash, target):
output = numpy.zeros(1, dtype=[('v', numpy.uint64, 1)])
if (ctx == False):
return output[0][0]
data = numpy.zeros(1, dtype=hash_dt, order='C')
data[0]['v'] = ("0000000000000000" + hash).decode("hex")
data[0]['target'] = target
hash_buf = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data)
dest_buf = cl.Buffer(ctx, cl.mem_flags.WRITE_ONLY, output.nbytes)
kernel = program.kernel_sha512
worksize = kernel.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, cl.get_platforms()[0].get_devices()[1])
kernel.set_arg(0, hash_buf)
kernel.set_arg(1, dest_buf)
start = time.time()
progress = 0
globamt = worksize*2000
while output[0][0] == 0:
kernel.set_arg(2, pack("<Q", progress))
cl.enqueue_nd_range_kernel(queue, kernel, (globamt,), (worksize,))
cl.enqueue_read_buffer(queue, dest_buf, output)
queue.finish()
progress += globamt
sofar = time.time() - start
print sofar, progress / sofar, "hashes/sec"
taken = time.time() - start
print progress, taken
return output[0][0]
示例15: reduce_flatrot
def reduce_flatrot():
sums = np.empty((8,4),'f')
evt = program.float4_sum(queue, (64*8,), (64,),
reduce_buf, reduce_scratch,
qxdyqz_buf, np.int32(length))
cl.enqueue_read_buffer(queue, reduce_buf, sums).wait()
return sums.sum(0)