本文整理汇总了Python中nengo_ocl.clraggedarray.CLRaggedArray.from_arrays方法的典型用法代码示例。如果您正苦于以下问题:Python CLRaggedArray.from_arrays方法的具体用法?Python CLRaggedArray.from_arrays怎么用?Python CLRaggedArray.from_arrays使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nengo_ocl.clraggedarray.CLRaggedArray
的用法示例。
在下文中一共展示了CLRaggedArray.from_arrays方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: RaggedArray
# 需要导入模块: from nengo_ocl.clraggedarray import CLRaggedArray [as 别名]
# 或者: from nengo_ocl.clraggedarray.CLRaggedArray import from_arrays [as 别名]
def RaggedArray(self, listofarrays, **kwargs):
return CLRaggedArray.from_arrays(self.queue, listofarrays, **kwargs)
示例2: test_speed
# 需要导入模块: from nengo_ocl.clraggedarray import CLRaggedArray [as 别名]
# 或者: from nengo_ocl.clraggedarray.CLRaggedArray import from_arrays [as 别名]
def test_speed(rng):
try:
import pyopencl_blas
except ImportError:
pyopencl_blas = None
# enable_out_of_order = (
# cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE)
k = 300
# k = 100
# k = 32
# k = 16
ms = [rng.randint(100, 1000) for i in range(k)]
ns = [rng.randint(100, 1000) for i in range(k)]
# ms = [4096 for i in range(k)]
# ns = [4096 for i in range(k)]
aa = [rng.uniform(-1, 1, size=(m, n)).astype('float32')
for m, n in zip(ms, ns)]
xx = [rng.uniform(-1, 1, size=n).astype('float32') for n in ns]
yy = [rng.uniform(-1, 1, size=m).astype('float32') for m in ms]
ajs = [np.int32(i) for i in range(k)]
xjs = [np.int32(i) for i in range(k)]
# ajs = [rng.randint(k, size=p) for i in range(k)]
# xjs = [rng.randint(k, size=p) for i in range(k)]
# alpha = 0.5
# beta = 0.1
alpha = 1.0
beta = 1.0
# -- prepare initial conditions on device
queue = cl.CommandQueue(ctx)
# queue = cl.CommandQueue(ctx, properties=enable_out_of_order)
clA = CLRA.from_arrays(queue, aa)
clX = CLRA.from_arrays(queue, xx)
clY = CLRA.from_arrays(queue, yy)
A_js = RA(ajs, dtype=np.int32)
X_js = RA(xjs, dtype=np.int32)
# -- run cl computation
prog = plan_ragged_gather_gemv(
queue, alpha, clA, A_js, clX, X_js, beta, clY)
plans = prog.choose_plans()
print('')
print('-' * 5 + ' Plans ' + '-' * 45)
for plan in plans:
print(plan)
with Timer() as timer:
for plan in plans:
plan()
print("nengo_ocl: %0.3f" % timer.duration)
# -- speed test in ocl blas
if pyopencl_blas:
pyopencl_blas.setup()
def array(a):
cla = cl.array.Array(queue, a.shape, a.dtype)
cla.set(a)
return cla
clAs = [array(a) for a in aa]
clXs = [array(x.ravel()) for x in xx]
clYs = [array(y.ravel()) for y in yy]
queues = [cl.CommandQueue(ctx) for _ in range(k)]
# queues = [cl.CommandQueue(ctx, properties=enable_out_of_order)
# for _ in range(k)]
queue.finish()
with Timer() as timer:
if 0:
# use a single queue
for A, X, Y in zip(clAs, clXs, clYs):
pyopencl_blas.gemv(queue, A, X, Y)
queue.finish()
else:
# use multiple parallel queues
events = []
for i, [A, X, Y] in enumerate(zip(clAs, clXs, clYs)):
q = queues[i % len(queues)]
e = pyopencl_blas.gemv(q, A, X, Y)
events.append(e)
for q in queues:
q.flush()
cl.wait_for_events(events)
print("clBLAS: %0.3f" % timer.duration)
示例3: block_impl
# 需要导入模块: from nengo_ocl.clraggedarray import CLRaggedArray [as 别名]
# 或者: from nengo_ocl.clraggedarray.CLRaggedArray import from_arrays [as 别名]
#.........这里部分代码省略.........
if (i < ${shape0} && j == 0)
ybuf[i] = ${float_alpha} * (sums[i][0] + sums[i][1]);
}
"""
text = as_ascii(Template(text, output_encoding='ascii').render(**textconf))
kernel = cl.Program(p.queue.context, text).build().fn
kernel.set_args(*[arr.data for arr in full_args])
plan = Plan(p.queue, kernel, gsize, lsize,
name='clra_gemv.block_impl',
tag=p.tag,
bw_per_call=bw_from_geometry(p.geometry, items),
flops_per_call=flops_from_geometry(p.geometry, items),
)
plan.full_args = full_args # prevent GC the args
plan.description = p.geometry_summary(items)
plan.Ybuf = clYbuf
# --- Reduce kernel
align = False
Nreduce = len(Yshape0s_reduce)
clYshape0s_reduce = to_device(
p.queue, np.array(Yshape0s_reduce, dtype=np.int32))
clYinstride0s_reduce = to_device(
p.queue, np.array(Yinstride0s_reduce, dtype=np.int32))
clYinstarts_reduce = to_device(
p.queue, np.array(Yinstarts_reduce, dtype=np.int32))
clYstride0s_reduce = to_device(
p.queue, np.array(Ystride0s_reduce, dtype=np.int32))
clYstarts_reduce = to_device(
p.queue, np.array(Ystarts_reduce, dtype=np.int32))
clYbufinds_reduce = CLRaggedArray.from_arrays(
p.queue, Ybufinds_reduce, dtype=np.int32, align=align)
assert len(clYbufinds_reduce) == Nreduce
assert (clYbufinds_reduce.shape1s == 1).all()
textconf_reduce = dict(
Ybuf=clYbuf,
Yin=p.Y_in,
Y=p.Y,
float_beta=p.float_beta,
float_gamma=p.float_gamma,
)
full_args_reduce = (
clYshape0s_reduce,
clYbufinds_reduce.cl_shape0s,
clYbufinds_reduce.cl_starts,
clYbufinds_reduce.cl_buf,
clYbuf,
clYinstride0s_reduce,
clYinstarts_reduce,
p.Y_in.cl_buf,
clYstride0s_reduce,
clYstarts_reduce,
p.Y.cl_buf,
)
lsize_reduce = None
gsize_reduce = (block_y, Nreduce)
text_reduce = """
__kernel void reduce(
__global const int *shape0s,