當前位置: 首頁>>代碼示例>>Python>>正文


Python Plan.full_args方法代碼示例

本文整理匯總了Python中plan.Plan.full_args方法的典型用法代碼示例。如果您正苦於以下問題:Python Plan.full_args方法的具體用法?Python Plan.full_args怎麽用?Python Plan.full_args使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在plan.Plan的用法示例。


在下文中一共展示了Plan.full_args方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: plan_direct

# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
def plan_direct(queue, code, init, Xname, X, Y, tag=None):
    from . import ast_conversion

    assert len(X) == len(Y)
    N = len(X)

    text = """
        ////////// MAIN FUNCTION //////////
        __kernel void fn(
            __global const int *${IN}starts,
            __global const ${INtype} *${IN}data,
            __global const int *${OUT}starts,
            __global ${OUTtype} *${OUT}data
        )
        {
            const int n = get_global_id(0);
            if (n >= ${N}) return;

            __global const ${INtype} *${arg} = ${IN}data + ${IN}starts[n];
            __global ${OUTtype} *${OUT} = ${OUT}data + ${OUT}starts[n];

            /////vvvvv USER DECLARATIONS BELOW vvvvv
${init}

            /////vvvvv USER COMPUTATIONS BELOW vvvvv
${code}
            // END OF FUNC: put nothing after user code, since it can return
        }
        """

    textconf = dict(init=_indent(init, 12),
                    code=_indent(code, 12), N=N, arg=Xname,
                    IN=ast_conversion.INPUT_NAME, INtype=X.cl_buf.ocldtype,
                    OUT=ast_conversion.OUTPUT_NAME, OUTtype=Y.cl_buf.ocldtype,
                    )
    text = Template(text, output_encoding='ascii').render(**textconf)

    full_args = (X.cl_starts, X.cl_buf, Y.cl_starts, Y.cl_buf)
    _fn = cl.Program(queue.context, text).build().fn
    _fn.set_args(*[arr.data for arr in full_args])

    gsize = (N,)
    rval = Plan(queue, _fn, gsize, lsize=None, name="cl_direct", tag=tag)
    rval.full_args = full_args     # prevent garbage-collection
    return rval
開發者ID:bptripp,項目名稱:nengo_ocl,代碼行數:47,代碼來源:clra_nonlinearities.py

示例2: _plan_template

# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]

#.........這裏部分代碼省略.........
            m++;
            if (m >= lengths[n]) {
                n++;
                m = 0;
                if (n >= ${N}) return;

    % for name, [type, offset] in ivars.items() + ovars.items() + pvars.items():
                cur_${name} = in_${name} + ${offset};
    % endfor
    % for name, [type, offset] in pvars.items():
                ${name}_isvector = ${name}_shape0s[n] > 1;
                if (!${name}_isvector) ${name} = *cur_${name};
    % endfor
            } else {
    % for name, [type, offset] in ivars.items() + ovars.items():
                cur_${name}++;
    % endfor
    % for name, [type, offset] in pvars.items():
                if (${name}_isvector) cur_${name}++;
    % endfor
            }
  % endif
% endfor
        }
        """
    else:
        ### Allocate more than enough kernels in a matrix
        gsize = (int(np.max(base.shape0s)), int(N))
        text = """
        ////////// MAIN FUNCTION //////////
        __kernel void fn(
% for name, [type, offset] in ivars.items():
            __global const int *${name}_starts,
            __global const ${type} *in_${name},
% endfor
% for name, [type, offset] in ovars.items():
            __global const int *${name}_starts,
            __global ${type} *in_${name},
% endfor
% for name, [type, offset] in pvars.items():
            __global const int *${name}_starts,
            __global const int *${name}_shape0s,
            __global const ${type} *in_${name},
% endfor
            __global const int *lengths
        )
        {
            const int m = get_global_id(0);
            const int n = get_global_id(1);
            const int M = lengths[n];
            if (m >= M) return;

% for name, [type, offset] in ivars.items():
            ${type} ${name} = in_${name}[${offset} + m];
% endfor
% for name, [type, offset] in ovars.items():
            ${type} ${name};
% endfor
% for name, [type, offset] in pvars.items():
            const ${type} ${name} = (${name}_shape0s[n] > 1) ?
                in_${name}[${offset} + m] : in_${name}[${offset}];
% endfor
% for name, [type, value] in static_params.items():
            const ${type} ${name} = ${value};
% endfor
            //////////////////////////////////////////////////
            //vvvvv USER DECLARATIONS BELOW vvvvv
            ${declares}
            //^^^^^ USER DECLARATIONS ABOVE ^^^^^
            //////////////////////////////////////////////////

            /////vvvvv USER COMPUTATIONS BELOW vvvvv
            ${core_text}
            /////^^^^^ USER COMPUTATIONS ABOVE ^^^^^

% for name, [type, offset] in ovars.items():
            in_${name}[${offset} + m] = ${name};
% endfor
        }
        """

    text = Template(text, output_encoding="ascii").render(**textconf)
    if 0:
        for i, line in enumerate(text.split("\n")):
            print "%3d %s" % (i + 1, line)

    full_args = []
    for name, v in inputs.items() + outputs.items():
        full_args.extend([v.cl_starts, v.cl_buf])
    for name, v in params.items():
        full_args.extend([v.cl_starts, v.cl_shape0s, v.cl_buf])
    full_args.append(base.cl_shape0s)
    full_args = tuple(full_args)

    _fn = cl.Program(queue.context, text).build().fn
    _fn.set_args(*[arr.data for arr in full_args])

    rval = Plan(queue, _fn, gsize, lsize=None, name=name, tag=tag)
    rval.full_args = full_args  # prevent garbage-collection
    return rval
開發者ID:jaberg,項目名稱:nengo_ocl-main,代碼行數:104,代碼來源:clra_nonlinearities.py

示例3: plan_probes

# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]
def plan_probes(queue, periods, X, Y, tag=None):
    """
    Parameters
    ----------
    P : raggedarray of ints
        The period (in time-steps) of each probe
    """

    assert len(X) == len(Y)
    assert len(X) == len(periods)
    N = len(X)

    cl_countdowns = to_device(queue, np.zeros(N, dtype="int32"))
    cl_bufpositions = to_device(queue, np.zeros(N, dtype="int32"))
    cl_periods = to_device(queue, np.asarray(periods, dtype="int32"))

    assert X.cl_buf.ocldtype == Y.cl_buf.ocldtype

    ### N.B.  X[i].shape = (ndims[i], )
    ###       Y[i].shape = (buf_ndims[i], buf_len)

    for i in xrange(N):
        assert X.shape0s[i] == Y.shape1s[i]
        assert X.shape1s[i] == 1
        assert X.stride0s[i] == 1
        assert Y.stride1s[i] == 1

    text = """
        ////////// MAIN FUNCTION //////////
        __kernel void fn(
            __global int *countdowns,
            __global int *bufpositions,
            __global const int *periods,
            __global const int *Xstarts,
            __global const int *Xshape0s,
            __global const ${Xtype} *Xdata,
            __global const int *Ystarts,
            __global ${Ytype} *Ydata
        )
        {
            const int n = get_global_id(1);
            const int countdown = countdowns[n];

            if (countdown == 0) {
                const int n_dims = Xshape0s[n];
                __global const ${Xtype} *x = Xdata + Xstarts[n];
                const int bufpos = bufpositions[n];

                __global ${Ytype} *y = Ydata + Ystarts[n] + bufpos * n_dims;

                for (int ii = get_global_id(0);
                         ii < n_dims;
                         ii += get_global_size(0))
                {
                    y[ii] = x[ii];
                }
                // This should *not* cause deadlock because
                // all local threads guaranteed to be
                // in this branch together.
                barrier(CLK_LOCAL_MEM_FENCE);
                if (get_global_id(0) == 0)
                {
                    countdowns[n] = periods[n] - 1;
                    bufpositions[n] = bufpos + 1;
                }
            }
            else
            {
                barrier(CLK_LOCAL_MEM_FENCE);
                if (get_global_id(0) == 0)
                {
                    countdowns[n] = countdown - 1;
                }
            }
        }
        """

    textconf = dict(N=N, Xtype=X.cl_buf.ocldtype, Ytype=Y.cl_buf.ocldtype)
    text = Template(text, output_encoding="ascii").render(**textconf)

    full_args = (cl_countdowns, cl_bufpositions, cl_periods, X.cl_starts, X.cl_shape0s, X.cl_buf, Y.cl_starts, Y.cl_buf)
    _fn = cl.Program(queue.context, text).build().fn
    _fn.set_args(*[arr.data for arr in full_args])

    max_len = min(queue.device.max_work_group_size, max(X.shape0s))
    gsize = (max_len, N)
    lsize = (max_len, 1)
    rval = Plan(queue, _fn, gsize, lsize=lsize, name="cl_probes", tag=tag)
    rval.full_args = full_args  # prevent garbage-collection
    rval.cl_bufpositions = cl_bufpositions
    rval.Y = Y
    return rval
開發者ID:jaberg,項目名稱:nengo_ocl-main,代碼行數:94,代碼來源:clra_nonlinearities.py

示例4: many_dots_impl

# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]

#.........這裏部分代碼省略.........
            const __global ${cl_beta.ocldtype} * betas,
            % endif
            const __global ${Y_in.cl_buf.ocldtype} *Y_in_data,
            __global ${Y.cl_buf.ocldtype} *Y_data)
    {
        __local int lstructure[${n_structure_vars}];
        __local ${Y.cl_buf.ocldtype} y_sum_pre[${segment_size}];
        __local ${Y.cl_buf.ocldtype} \
            y_sum_post[${dot_block_size}][${segment_size}];
        const int local_idx = get_local_id(0) \
            + get_local_id(1) * get_local_size(0);

        int segment_idx = get_local_id(0);
        int dot_block_idx = get_local_id(1);

        for (int ii = local_idx; ii < ${n_structure_vars}; ii += ${n_locals})
        {
            lstructure[ii] = gstructure[
                get_global_id(2) * ${structure_vars_stride} + ii];
        }
        barrier(CLK_LOCAL_MEM_FENCE);

        if (get_global_id(0) < ${y_len})
        {

            if (dot_block_idx == 0)
            {
    % if float_beta is not None and float_beta != 0 :
                y_sum_pre[segment_idx]
                = ${float_beta} * Y_in_data[${y_in_starts} + get_global_id(0)];
    % elif cl_beta is not None:
                y_sum_pre[segment_idx]
                = betas[${bb}] * Y_in_data[${y_in_starts} + get_global_id(0)];
    % else :
                y_sum_pre[segment_idx] = 0;
    % endif

    % if float_gamma is not None:
        % if float_gamma != 0:
                y_sum_pre[segment_idx] += ${float_gamma};
        % endif
    % endif
            }
        //printf("betaY + gamma=%f\\n", y_sum_pre[segment_idx]);

            // XXX Move X into shared memory first
            y_sum_post[dot_block_idx][segment_idx] = 0;
            for (int ii = dot_block_idx;
                     ii < ${n_dot_products};
                     ii += ${dot_block_size})
            {
                for (int nn = 0; nn < ${N_i}; nn += 1)
                {
                    y_sum_post[dot_block_idx][segment_idx]
                    += A_data[${a_starts} + get_global_id(0) * ${a_s0} + nn]
                       * X_data[${x_starts} + nn];
                }
            }
        }
        barrier(CLK_LOCAL_MEM_FENCE);
        //printf("AX=%f\\n", y_sum_post[dot_block_idx][segment_idx]);
        if ((get_global_id(0) < ${y_len}) && (dot_block_idx == 0))
        {
            for (int ii = 1; ii < ${dot_block_size}; ++ii)
            {
                y_sum_post[0][segment_idx] += y_sum_post[ii][segment_idx];
            }
            Y_data[${y_offset} + get_global_id(0)]
                = y_sum_pre[segment_idx]
                  + ${float_alpha} * y_sum_post[0][segment_idx];
        //printf("Yout=%f\\n", Y_data[${y_offset} + get_global_id(0)]);
        }
    }
        """

    text = Template(text, output_encoding='ascii').render(**textconf)

    fn = cl.Program(p.queue.context, text).build().fn

    full_args = [
                 cl_gstructure,
                 p.A.cl_buf,
                 p.X.cl_buf,
                 ]
    if p.cl_beta is not None:
        full_args += [p.cl_beta]
    full_args += [
                 p.Y_in.cl_buf,
                 p.Y.cl_buf,
                ]

    fn.set_args(*[arr.data for arr in full_args])
    rval = Plan(p.queue, fn, gsize, lsize,
        name='clra_gemv.many_dots_impl',
        tag=p.tag,
        bw_per_call=bw_from_geometry(p.geometry, items),
        flops_per_call=flops_from_geometry(p.geometry, items),
        )
    rval.full_args = full_args  # prevent GC the args
    return rval
開發者ID:jaberg,項目名稱:nengo_ocl-main,代碼行數:104,代碼來源:clra_gemv.py

示例5: reduce_impl

# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]

#.........這裏部分代碼省略.........
    % else :
            y_sum_pre[get_local_id(1)] = 0;
    % endif

    % if float_gamma is not None and float_gamma != 0:
            y_sum_pre[get_local_id(1)] += ${float_gamma};
    % endif
    // printf("betaY + gamma=%f\\n", y_sum_pre[get_local_id(1)]);
        }

        partialDotProduct[get_local_id(1)][get_local_id(0)] = 0;
    % if max_n_dots > 1:
        for (int ii = 0;
                 ii < ${n_dot_products};
                 ii += 1)
        {
    % else:
        const int ii = 0;
    % endif


        for (int nn = get_local_id(0);
                 nn < ${N_cutoff};
                 nn += get_local_size(0))
        {
    // segment_size = ${segment_size}
    % if (segment_size == 1):
            if ((nn < ${N_i}) && (get_global_id(1) < ${y_len}))
            {
            partialDotProduct[get_local_id(1)][get_local_id(0)] +=
                A_data[${a_starts} + get_global_id(1) * ${a_s0} + nn]
                * X_data[${x_starts} + nn];
            }
    % else:
            barrier(CLK_LOCAL_MEM_FENCE);
            if ((get_local_id(1) == 0) && (nn < ${N_i}))
            {
                lX[get_local_id(0)] = X_data[${x_starts} + nn];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
            if ((nn < ${N_i}) && (get_global_id(1) < ${y_len}))
            {
            partialDotProduct[get_local_id(1)][get_local_id(0)] +=
                A_data[${a_starts} + get_global_id(1) * ${a_s0} + nn]
                * lX[get_local_id(0)];
            }
    % endif
        }

    % if (max_n_dots > 1):
        }
    % endif

        // -- Parallel reduction long work-group dimension 0
        for (uint stride = 1;
                  stride < get_local_size(0);
                  stride *= 2)
        {
            barrier(CLK_LOCAL_MEM_FENCE);

            uint index = 2 * stride * get_local_id(0);
            if (index + stride < get_local_size(0))
            {
                partialDotProduct[get_local_id(1)][index] +=
                    partialDotProduct[get_local_id(1)][index + stride];
            }
        }
        // barrier(CLK_LOCAL_MEM_FENCE);
        if ((get_local_id(0) == 0) && (get_global_id(1) < ${y_len})) {
            Y_data[${y_offset} + get_global_id(1)] = y_sum_pre[get_local_id(1)]
                + ${float_alpha} * partialDotProduct[get_local_id(1)][0];
        }
    }
        """

    text = Template(text, output_encoding='ascii').render(**textconf)

    fn = cl.Program(p.queue.context, text).build().fn

    full_args = [
                 cl_gstructure,
                 p.A.cl_buf,
                 p.X.cl_buf,
                 ]
    if p.cl_beta is not None:
        full_args += [p.cl_beta]
    full_args += [
                 p.Y_in.cl_buf,
                 p.Y.cl_buf,
                ]

    fn.set_args(*[arr.data for arr in full_args])
    rval = Plan(p.queue, fn, gsize, lsize,
        name='clra_gemv.reduce_impl',
        tag=p.tag,
        bw_per_call=bw_from_geometry(p.geometry, items),
        flops_per_call=flops_from_geometry(p.geometry, items),
        )
    rval.full_args = full_args  # prevent GC the args
    return rval
開發者ID:jaberg,項目名稱:nengo_ocl-main,代碼行數:104,代碼來源:clra_gemv.py

示例6: ref_impl

# 需要導入模塊: from plan import Plan [as 別名]
# 或者: from plan.Plan import full_args [as 別名]

#.........這裏部分代碼省略.........
                const ${cl_beta.ocldtype} beta = betas[bb];
    % elif clra_beta is not None:
                const int beta_offset = beta_starts[bb];
                const ${clra_beta.cl_buf.ocldtype} beta
                    = beta_data[beta_offset + mm];
    % endif

    % if float_gamma is not None:
                const ${Y.cl_buf.ocldtype} gamma = ${float_gamma};
    % elif cl_gamma is not None:
                const ${cl_gamma.ocldtype} gamma = gammas[bb];
    % endif

                Y_data[y_offset + mm] = gamma + beta * Y_in_data[y_in_offset + mm];

    % if (A_js is not None) :

                const int n_dot_products = A_js_shape0s[bb];
                X_js_data += X_js_starts[bb];
                A_js_data += A_js_starts[bb];

                ${Y.cl_buf.ocldtype} y_sum = 0;
                for (int ii = 0; ii < n_dot_products; ++ii)
                {
                    const int x_ji = X_js_data[ii];
                    const int a_ji = A_js_data[ii];
                    const int N_i = A_shape1s[a_ji];
                    const int x_offset = X_starts[x_ji];
                    const int a_offset = A_starts[a_ji];
                    const int AsM = A_stride0s[a_ji];
                    const int XsM = X_stride0s[x_ji];

                    for (int nn = 0; nn < N_i; ++nn)
                    {
                        y_sum += X_data[x_offset + nn * XsM]
                                 * A_data[a_offset + mm * AsM + nn];
                    }
                }
        % if float_alpha is not None:
                Y_data[y_offset + mm] += ${float_alpha} * y_sum;
        % elif cl_alpha is not None:
                Y_data[y_offset + mm] += alphas[bb] * y_sum;
        % endif
    % endif
            }

        }
    """

    text = Template(text, output_encoding='ascii').render(**p.__dict__)
    #print text

    gsize = (
        max(p.geometry[ii]['y_len'] for ii in items),
        len(items))
    lsize = None
    fn = cl.Program(p.queue.context, text).build().fn
    full_args = [cl_items]
    if p.cl_alpha is not None:
        full_args += [p.cl_alpha]
    if p.A_js is not None:
        full_args += [
            p.A.cl_starts,
            p.A.cl_shape1s,
            p.A.cl_stride0s,
            p.A.cl_buf,
            p.A_js.cl_starts,
            p.A_js.cl_shape0s,
            p.A_js.cl_buf,
            p.X.cl_starts,
            p.X.cl_stride0s,
            p.X.cl_buf,
            p.X_js.cl_starts,
            p.X_js.cl_buf,
            ]
    if p.cl_beta is not None:
        full_args += [p.cl_beta]
    elif p.clra_beta is not None:
        full_args += [p.clra_beta.cl_starts, p.clra_beta.cl_buf]

    if p.cl_gamma is not None:
        full_args += [p.cl_gamma]
    elif p.clra_gamma is not None:
        full_args += [p.clra_gamma.cl_starts, p.clra_gamma.cl_buf]

    full_args += [
        p.Y_in.cl_starts,
        p.Y_in.cl_buf,
        p.Y.cl_starts,
        p.Y.cl_shape0s,
        p.Y.cl_buf]

    #print [str(arr.dtype)[0] for arr in full_args]
    fn.set_args(*[arr.data for arr in full_args])
    rval = Plan(p.queue, fn, gsize, lsize, name="clra_gemv.ref_impl",
        tag=p.tag,
        bw_per_call=bw_from_geometry(p.geometry, items),
        flops_per_call=flops_from_geometry(p.geometry, items))
    rval.full_args = full_args  # prevent GC the args
    return rval
開發者ID:jaberg,項目名稱:nengo_ocl-main,代碼行數:104,代碼來源:clra_gemv.py


注:本文中的plan.Plan.full_args方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。