本文整理匯總了Python中plan.Plan.full_args方法的典型用法代碼示例。如果您正苦於以下問題:Python Plan.full_args方法的具體用法?Python Plan.full_args怎麽用?Python Plan.full_args使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類plan.Plan
的用法示例。
在下文中一共展示了Plan.full_args方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: plan_direct
# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
def plan_direct(queue, code, init, Xname, X, Y, tag=None):
from . import ast_conversion
assert len(X) == len(Y)
N = len(X)
text = """
////////// MAIN FUNCTION //////////
__kernel void fn(
__global const int *${IN}starts,
__global const ${INtype} *${IN}data,
__global const int *${OUT}starts,
__global ${OUTtype} *${OUT}data
)
{
const int n = get_global_id(0);
if (n >= ${N}) return;
__global const ${INtype} *${arg} = ${IN}data + ${IN}starts[n];
__global ${OUTtype} *${OUT} = ${OUT}data + ${OUT}starts[n];
/////vvvvv USER DECLARATIONS BELOW vvvvv
${init}
/////vvvvv USER COMPUTATIONS BELOW vvvvv
${code}
// END OF FUNC: put nothing after user code, since it can return
}
"""
textconf = dict(init=_indent(init, 12),
code=_indent(code, 12), N=N, arg=Xname,
IN=ast_conversion.INPUT_NAME, INtype=X.cl_buf.ocldtype,
OUT=ast_conversion.OUTPUT_NAME, OUTtype=Y.cl_buf.ocldtype,
)
text = Template(text, output_encoding='ascii').render(**textconf)
full_args = (X.cl_starts, X.cl_buf, Y.cl_starts, Y.cl_buf)
_fn = cl.Program(queue.context, text).build().fn
_fn.set_args(*[arr.data for arr in full_args])
gsize = (N,)
rval = Plan(queue, _fn, gsize, lsize=None, name="cl_direct", tag=tag)
rval.full_args = full_args # prevent garbage-collection
return rval
示例2: _plan_template
# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
#.........這裏部分代碼省略.........
m++;
if (m >= lengths[n]) {
n++;
m = 0;
if (n >= ${N}) return;
% for name, [type, offset] in ivars.items() + ovars.items() + pvars.items():
cur_${name} = in_${name} + ${offset};
% endfor
% for name, [type, offset] in pvars.items():
${name}_isvector = ${name}_shape0s[n] > 1;
if (!${name}_isvector) ${name} = *cur_${name};
% endfor
} else {
% for name, [type, offset] in ivars.items() + ovars.items():
cur_${name}++;
% endfor
% for name, [type, offset] in pvars.items():
if (${name}_isvector) cur_${name}++;
% endfor
}
% endif
% endfor
}
"""
else:
### Allocate more than enough kernels in a matrix
gsize = (int(np.max(base.shape0s)), int(N))
text = """
////////// MAIN FUNCTION //////////
__kernel void fn(
% for name, [type, offset] in ivars.items():
__global const int *${name}_starts,
__global const ${type} *in_${name},
% endfor
% for name, [type, offset] in ovars.items():
__global const int *${name}_starts,
__global ${type} *in_${name},
% endfor
% for name, [type, offset] in pvars.items():
__global const int *${name}_starts,
__global const int *${name}_shape0s,
__global const ${type} *in_${name},
% endfor
__global const int *lengths
)
{
const int m = get_global_id(0);
const int n = get_global_id(1);
const int M = lengths[n];
if (m >= M) return;
% for name, [type, offset] in ivars.items():
${type} ${name} = in_${name}[${offset} + m];
% endfor
% for name, [type, offset] in ovars.items():
${type} ${name};
% endfor
% for name, [type, offset] in pvars.items():
const ${type} ${name} = (${name}_shape0s[n] > 1) ?
in_${name}[${offset} + m] : in_${name}[${offset}];
% endfor
% for name, [type, value] in static_params.items():
const ${type} ${name} = ${value};
% endfor
//////////////////////////////////////////////////
//vvvvv USER DECLARATIONS BELOW vvvvv
${declares}
//^^^^^ USER DECLARATIONS ABOVE ^^^^^
//////////////////////////////////////////////////
/////vvvvv USER COMPUTATIONS BELOW vvvvv
${core_text}
/////^^^^^ USER COMPUTATIONS ABOVE ^^^^^
% for name, [type, offset] in ovars.items():
in_${name}[${offset} + m] = ${name};
% endfor
}
"""
text = Template(text, output_encoding="ascii").render(**textconf)
if 0:
for i, line in enumerate(text.split("\n")):
print "%3d %s" % (i + 1, line)
full_args = []
for name, v in inputs.items() + outputs.items():
full_args.extend([v.cl_starts, v.cl_buf])
for name, v in params.items():
full_args.extend([v.cl_starts, v.cl_shape0s, v.cl_buf])
full_args.append(base.cl_shape0s)
full_args = tuple(full_args)
_fn = cl.Program(queue.context, text).build().fn
_fn.set_args(*[arr.data for arr in full_args])
rval = Plan(queue, _fn, gsize, lsize=None, name=name, tag=tag)
rval.full_args = full_args # prevent garbage-collection
return rval
示例3: plan_probes
# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
def plan_probes(queue, periods, X, Y, tag=None):
"""
Parameters
----------
P : raggedarray of ints
The period (in time-steps) of each probe
"""
assert len(X) == len(Y)
assert len(X) == len(periods)
N = len(X)
cl_countdowns = to_device(queue, np.zeros(N, dtype="int32"))
cl_bufpositions = to_device(queue, np.zeros(N, dtype="int32"))
cl_periods = to_device(queue, np.asarray(periods, dtype="int32"))
assert X.cl_buf.ocldtype == Y.cl_buf.ocldtype
### N.B. X[i].shape = (ndims[i], )
### Y[i].shape = (buf_ndims[i], buf_len)
for i in xrange(N):
assert X.shape0s[i] == Y.shape1s[i]
assert X.shape1s[i] == 1
assert X.stride0s[i] == 1
assert Y.stride1s[i] == 1
text = """
////////// MAIN FUNCTION //////////
__kernel void fn(
__global int *countdowns,
__global int *bufpositions,
__global const int *periods,
__global const int *Xstarts,
__global const int *Xshape0s,
__global const ${Xtype} *Xdata,
__global const int *Ystarts,
__global ${Ytype} *Ydata
)
{
const int n = get_global_id(1);
const int countdown = countdowns[n];
if (countdown == 0) {
const int n_dims = Xshape0s[n];
__global const ${Xtype} *x = Xdata + Xstarts[n];
const int bufpos = bufpositions[n];
__global ${Ytype} *y = Ydata + Ystarts[n] + bufpos * n_dims;
for (int ii = get_global_id(0);
ii < n_dims;
ii += get_global_size(0))
{
y[ii] = x[ii];
}
// This should *not* cause deadlock because
// all local threads guaranteed to be
// in this branch together.
barrier(CLK_LOCAL_MEM_FENCE);
if (get_global_id(0) == 0)
{
countdowns[n] = periods[n] - 1;
bufpositions[n] = bufpos + 1;
}
}
else
{
barrier(CLK_LOCAL_MEM_FENCE);
if (get_global_id(0) == 0)
{
countdowns[n] = countdown - 1;
}
}
}
"""
textconf = dict(N=N, Xtype=X.cl_buf.ocldtype, Ytype=Y.cl_buf.ocldtype)
text = Template(text, output_encoding="ascii").render(**textconf)
full_args = (cl_countdowns, cl_bufpositions, cl_periods, X.cl_starts, X.cl_shape0s, X.cl_buf, Y.cl_starts, Y.cl_buf)
_fn = cl.Program(queue.context, text).build().fn
_fn.set_args(*[arr.data for arr in full_args])
max_len = min(queue.device.max_work_group_size, max(X.shape0s))
gsize = (max_len, N)
lsize = (max_len, 1)
rval = Plan(queue, _fn, gsize, lsize=lsize, name="cl_probes", tag=tag)
rval.full_args = full_args # prevent garbage-collection
rval.cl_bufpositions = cl_bufpositions
rval.Y = Y
return rval
示例4: many_dots_impl
# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
#.........這裏部分代碼省略.........
const __global ${cl_beta.ocldtype} * betas,
% endif
const __global ${Y_in.cl_buf.ocldtype} *Y_in_data,
__global ${Y.cl_buf.ocldtype} *Y_data)
{
__local int lstructure[${n_structure_vars}];
__local ${Y.cl_buf.ocldtype} y_sum_pre[${segment_size}];
__local ${Y.cl_buf.ocldtype} \
y_sum_post[${dot_block_size}][${segment_size}];
const int local_idx = get_local_id(0) \
+ get_local_id(1) * get_local_size(0);
int segment_idx = get_local_id(0);
int dot_block_idx = get_local_id(1);
for (int ii = local_idx; ii < ${n_structure_vars}; ii += ${n_locals})
{
lstructure[ii] = gstructure[
get_global_id(2) * ${structure_vars_stride} + ii];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_global_id(0) < ${y_len})
{
if (dot_block_idx == 0)
{
% if float_beta is not None and float_beta != 0 :
y_sum_pre[segment_idx]
= ${float_beta} * Y_in_data[${y_in_starts} + get_global_id(0)];
% elif cl_beta is not None:
y_sum_pre[segment_idx]
= betas[${bb}] * Y_in_data[${y_in_starts} + get_global_id(0)];
% else :
y_sum_pre[segment_idx] = 0;
% endif
% if float_gamma is not None:
% if float_gamma != 0:
y_sum_pre[segment_idx] += ${float_gamma};
% endif
% endif
}
//printf("betaY + gamma=%f\\n", y_sum_pre[segment_idx]);
// XXX Move X into shared memory first
y_sum_post[dot_block_idx][segment_idx] = 0;
for (int ii = dot_block_idx;
ii < ${n_dot_products};
ii += ${dot_block_size})
{
for (int nn = 0; nn < ${N_i}; nn += 1)
{
y_sum_post[dot_block_idx][segment_idx]
+= A_data[${a_starts} + get_global_id(0) * ${a_s0} + nn]
* X_data[${x_starts} + nn];
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
//printf("AX=%f\\n", y_sum_post[dot_block_idx][segment_idx]);
if ((get_global_id(0) < ${y_len}) && (dot_block_idx == 0))
{
for (int ii = 1; ii < ${dot_block_size}; ++ii)
{
y_sum_post[0][segment_idx] += y_sum_post[ii][segment_idx];
}
Y_data[${y_offset} + get_global_id(0)]
= y_sum_pre[segment_idx]
+ ${float_alpha} * y_sum_post[0][segment_idx];
//printf("Yout=%f\\n", Y_data[${y_offset} + get_global_id(0)]);
}
}
"""
text = Template(text, output_encoding='ascii').render(**textconf)
fn = cl.Program(p.queue.context, text).build().fn
full_args = [
cl_gstructure,
p.A.cl_buf,
p.X.cl_buf,
]
if p.cl_beta is not None:
full_args += [p.cl_beta]
full_args += [
p.Y_in.cl_buf,
p.Y.cl_buf,
]
fn.set_args(*[arr.data for arr in full_args])
rval = Plan(p.queue, fn, gsize, lsize,
name='clra_gemv.many_dots_impl',
tag=p.tag,
bw_per_call=bw_from_geometry(p.geometry, items),
flops_per_call=flops_from_geometry(p.geometry, items),
)
rval.full_args = full_args # prevent GC the args
return rval
示例5: reduce_impl
# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
#.........這裏部分代碼省略.........
% else :
y_sum_pre[get_local_id(1)] = 0;
% endif
% if float_gamma is not None and float_gamma != 0:
y_sum_pre[get_local_id(1)] += ${float_gamma};
% endif
// printf("betaY + gamma=%f\\n", y_sum_pre[get_local_id(1)]);
}
partialDotProduct[get_local_id(1)][get_local_id(0)] = 0;
% if max_n_dots > 1:
for (int ii = 0;
ii < ${n_dot_products};
ii += 1)
{
% else:
const int ii = 0;
% endif
for (int nn = get_local_id(0);
nn < ${N_cutoff};
nn += get_local_size(0))
{
// segment_size = ${segment_size}
% if (segment_size == 1):
if ((nn < ${N_i}) && (get_global_id(1) < ${y_len}))
{
partialDotProduct[get_local_id(1)][get_local_id(0)] +=
A_data[${a_starts} + get_global_id(1) * ${a_s0} + nn]
* X_data[${x_starts} + nn];
}
% else:
barrier(CLK_LOCAL_MEM_FENCE);
if ((get_local_id(1) == 0) && (nn < ${N_i}))
{
lX[get_local_id(0)] = X_data[${x_starts} + nn];
}
barrier(CLK_LOCAL_MEM_FENCE);
if ((nn < ${N_i}) && (get_global_id(1) < ${y_len}))
{
partialDotProduct[get_local_id(1)][get_local_id(0)] +=
A_data[${a_starts} + get_global_id(1) * ${a_s0} + nn]
* lX[get_local_id(0)];
}
% endif
}
% if (max_n_dots > 1):
}
% endif
// -- Parallel reduction long work-group dimension 0
for (uint stride = 1;
stride < get_local_size(0);
stride *= 2)
{
barrier(CLK_LOCAL_MEM_FENCE);
uint index = 2 * stride * get_local_id(0);
if (index + stride < get_local_size(0))
{
partialDotProduct[get_local_id(1)][index] +=
partialDotProduct[get_local_id(1)][index + stride];
}
}
// barrier(CLK_LOCAL_MEM_FENCE);
if ((get_local_id(0) == 0) && (get_global_id(1) < ${y_len})) {
Y_data[${y_offset} + get_global_id(1)] = y_sum_pre[get_local_id(1)]
+ ${float_alpha} * partialDotProduct[get_local_id(1)][0];
}
}
"""
text = Template(text, output_encoding='ascii').render(**textconf)
fn = cl.Program(p.queue.context, text).build().fn
full_args = [
cl_gstructure,
p.A.cl_buf,
p.X.cl_buf,
]
if p.cl_beta is not None:
full_args += [p.cl_beta]
full_args += [
p.Y_in.cl_buf,
p.Y.cl_buf,
]
fn.set_args(*[arr.data for arr in full_args])
rval = Plan(p.queue, fn, gsize, lsize,
name='clra_gemv.reduce_impl',
tag=p.tag,
bw_per_call=bw_from_geometry(p.geometry, items),
flops_per_call=flops_from_geometry(p.geometry, items),
)
rval.full_args = full_args # prevent GC the args
return rval
示例6: ref_impl
# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
#.........這裏部分代碼省略.........
const ${cl_beta.ocldtype} beta = betas[bb];
% elif clra_beta is not None:
const int beta_offset = beta_starts[bb];
const ${clra_beta.cl_buf.ocldtype} beta
= beta_data[beta_offset + mm];
% endif
% if float_gamma is not None:
const ${Y.cl_buf.ocldtype} gamma = ${float_gamma};
% elif cl_gamma is not None:
const ${cl_gamma.ocldtype} gamma = gammas[bb];
% endif
Y_data[y_offset + mm] = gamma + beta * Y_in_data[y_in_offset + mm];
% if (A_js is not None) :
const int n_dot_products = A_js_shape0s[bb];
X_js_data += X_js_starts[bb];
A_js_data += A_js_starts[bb];
${Y.cl_buf.ocldtype} y_sum = 0;
for (int ii = 0; ii < n_dot_products; ++ii)
{
const int x_ji = X_js_data[ii];
const int a_ji = A_js_data[ii];
const int N_i = A_shape1s[a_ji];
const int x_offset = X_starts[x_ji];
const int a_offset = A_starts[a_ji];
const int AsM = A_stride0s[a_ji];
const int XsM = X_stride0s[x_ji];
for (int nn = 0; nn < N_i; ++nn)
{
y_sum += X_data[x_offset + nn * XsM]
* A_data[a_offset + mm * AsM + nn];
}
}
% if float_alpha is not None:
Y_data[y_offset + mm] += ${float_alpha} * y_sum;
% elif cl_alpha is not None:
Y_data[y_offset + mm] += alphas[bb] * y_sum;
% endif
% endif
}
}
"""
text = Template(text, output_encoding='ascii').render(**p.__dict__)
#print text
gsize = (
max(p.geometry[ii]['y_len'] for ii in items),
len(items))
lsize = None
fn = cl.Program(p.queue.context, text).build().fn
full_args = [cl_items]
if p.cl_alpha is not None:
full_args += [p.cl_alpha]
if p.A_js is not None:
full_args += [
p.A.cl_starts,
p.A.cl_shape1s,
p.A.cl_stride0s,
p.A.cl_buf,
p.A_js.cl_starts,
p.A_js.cl_shape0s,
p.A_js.cl_buf,
p.X.cl_starts,
p.X.cl_stride0s,
p.X.cl_buf,
p.X_js.cl_starts,
p.X_js.cl_buf,
]
if p.cl_beta is not None:
full_args += [p.cl_beta]
elif p.clra_beta is not None:
full_args += [p.clra_beta.cl_starts, p.clra_beta.cl_buf]
if p.cl_gamma is not None:
full_args += [p.cl_gamma]
elif p.clra_gamma is not None:
full_args += [p.clra_gamma.cl_starts, p.clra_gamma.cl_buf]
full_args += [
p.Y_in.cl_starts,
p.Y_in.cl_buf,
p.Y.cl_starts,
p.Y.cl_shape0s,
p.Y.cl_buf]
#print [str(arr.dtype)[0] for arr in full_args]
fn.set_args(*[arr.data for arr in full_args])
rval = Plan(p.queue, fn, gsize, lsize, name="clra_gemv.ref_impl",
tag=p.tag,
bw_per_call=bw_from_geometry(p.geometry, items),
flops_per_call=flops_from_geometry(p.geometry, items))
rval.full_args = full_args # prevent GC the args
return rval