Python Plan.Ns方法代码示例

本文整理汇总了Python中plan.Plan.Ns方法的典型用法代码示例。如果您正苦于以下问题：Python Plan.Ns方法的具体用法？Python Plan.Ns怎么用？Python Plan.Ns使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类plan.Plan的用法示例。

在下文中一共展示了Plan.Ns方法的2个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: plan_parallel_ragged_gather_gemv2

# 需要导入模块: from plan import Plan [as 别名]
# 或者: from plan.Plan import Ns [as 别名]
def plan_parallel_ragged_gather_gemv2(queue, Ms, Ns, alpha, A, A_js, X, X_js,
                       beta, Y, group_size = 32, Y_in=None, tag=None):
    """
    """
    
    # TODO: if alpha or beta is a float
    #       then render it into the kernel text.
    try:
        float(alpha)
        alpha = [alpha] * len(Y)
    except TypeError:
        pass

    try:
        float(beta)
        beta = [beta] * len(Y)
    except TypeError:
        pass

    cl_alpha = to_device(queue, np.asarray(alpha, Y.buf.dtype))
    cl_beta = to_device(queue, np.asarray(beta, Y.buf.dtype))
    

    if Y_in is None:
        Y_in = Y

    # XXX check for e.g. all Ns being the same thing
    #     especially all Ns == 1
    cl_Ns = to_device(queue, np.asarray(Ns, 'int32'))

    # XXX check that all the ints are ints not longs
    textconf = {
        'type_alpha': cl_alpha.ocldtype,
        'type_beta': cl_beta.ocldtype,
        'type_A': A.cl_buf.ocldtype,
        'type_X': X.cl_buf.ocldtype,
        'type_Y': Y.cl_buf.ocldtype,
        'y_len': len(Y),
        'lsize': group_size,
    }

    text = """
        __kernel void fn(
            const __global int *Ns,
            const __global ${type_alpha} * alphas,
            const __global int *A_starts,
            const __global ${type_A} *A_data,
            const __global int *A_js_starts,
            const __global int *A_js_lens,
            const __global int *A_js_data,
            const __global int *X_starts,
            const __global ${type_X} *X_data,
            const __global int *X_js_starts,
            const __global int *X_js_data,
            const __global ${type_beta} * betas,
            const __global int *Y_in_starts,
            const __global ${type_Y} *Y_in_data,
            const __global int *Y_starts,
            const __global int *Y_lens,
            __global ${type_Y} *Y_data)
        {
            //const int mm = get_global_id(1); //TODO
            
            
            __local ${type_Y} partialDotProduct[${lsize}]; //Scratch space for the dot products

            //Y is divided into groups of size group_size. Each work-item does enough dot-products to cover one of the groups
            for (uint yi = get_group_id(0); yi < ${y_len}; yi += get_num_groups(0)) {

                const __global int* X_js_row = X_js_data + X_js_starts[yi];
                const __global int* A_js_row = A_js_data + A_js_starts[yi];
                
                const ${type_alpha} alpha = alphas[yi];
                const ${type_beta} beta = betas[yi];
                
                int y_offset = Y_starts[yi];
                int y_in_offset = Y_in_starts[yi];
                Y_data[y_offset] = beta * Y_in_data[y_in_offset];
                
                float sum = 0; 
                int n_dot_products = A_js_lens[yi]; //Do all of xjs dot products at same time
                for(int j = 0; j < n_dot_products; j++) {
                    int x_ji = X_js_row[j];
                    int a_ji = A_js_row[j];
                    int N_i = Ns[a_ji];
                              
                    const __global ${type_A}* A_row = A_data + A_starts[a_ji]; //Get the rows for the product
                    const __global ${type_X}* X_row = X_data + X_starts[x_ji];

                    //Each work item will do some fraction of the multiplications and store the result locally
                    for (uint x = get_local_id(0); x < N_i; x += get_local_size(0)) {
                        sum += A_row[x] * X_row[x]; 
                    }
                }
                partialDotProduct[get_local_id(0)] = sum;
                
                //Parallel reduction of locally stored sums
                for (uint stride = 1; stride < get_local_size(0); stride *= 2) { 
                    barrier(CLK_LOCAL_MEM_FENCE); 
                    
#.........这里部分代码省略.........

开发者ID:zero-impact，项目名称:nengo_ocl，代码行数:103，代码来源:gemv_batched.py

示例2: plan_ragged_gather_gemv

# 需要导入模块: from plan import Plan [as 别名]
# 或者: from plan.Plan import Ns [as 别名]
def plan_ragged_gather_gemv(queue, Ms, Ns, alpha, A, A_js, X, X_js,
                       beta, Y, Y_in=None, tag=None):
    """
    """
    # TODO: if alpha or beta is a float
    #       then render it into the kernel text.
    try:
        float(alpha)
        alpha = [alpha] * len(Y)
    except TypeError:
        pass

    try:
        float(beta)
        beta = [beta] * len(Y)
    except TypeError:
        pass

    cl_alpha = to_device(queue, np.asarray(alpha, Y.buf.dtype))
    cl_beta = to_device(queue, np.asarray(beta, Y.buf.dtype))

    if Y_in is None:
        Y_in = Y

    # XXX check for e.g. all Ns being the same thing
    #     especially all Ns == 1
    cl_Ns = to_device(queue, np.asarray(Ns, 'int32'))

    # XXX check that all the ints are ints not longs
    textconf = {
        'type_alpha': cl_alpha.ocldtype,
        'type_beta': cl_beta.ocldtype,
        'type_A': A.cl_buf.ocldtype,
        'type_X': X.cl_buf.ocldtype,
        'type_Y': Y.cl_buf.ocldtype,
    }

    text = """
        __kernel void fn(
            __global int *Ns,
            __global ${type_alpha} * alphas,
            __global int *A_starts,
            __global ${type_A} *A_data,
            __global int *A_js_starts,
            __global int *A_js_lens,
            __global int *A_js_data,
            __global int *X_starts,
            __global ${type_X} *X_data,
            __global int *X_js_starts,
            __global int *X_js_data,
            __global ${type_beta} * betas,
            __global int *Y_in_starts,
            __global ${type_Y} *Y_in_data,
            __global int *Y_starts,
            __global int *Y_lens,
            __global ${type_Y} *Y_data)
        {
            const int mm = get_global_id(0);
            const int bb = get_global_id(1);
            const int M = Y_lens[bb];
            if (mm < M)
            {
                const ${type_alpha} alpha = alphas[bb];
                const ${type_beta} beta = betas[bb];

                int n_dot_products = A_js_lens[bb];
                int y_offset = Y_starts[bb];
                int y_in_offset = Y_in_starts[bb];

                X_js_data += X_js_starts[bb];
                A_js_data += A_js_starts[bb];

                Y_data[y_offset + mm] = beta * Y_in_data[y_in_offset + mm];

                for (int ii = 0; ii < n_dot_products; ++ii)
                {
                    int x_ji = X_js_data[ii];
                    int a_ji = A_js_data[ii];
                    int N_i = Ns[a_ji];
                    int x_offset = X_starts[x_ji];
                    int a_offset = A_starts[a_ji];

                    // compute the matrix-vector product
                    // dot(X[x_ji], A[a_ji])
                    ${type_Y} y_sum = 0;
                    for (int nn = 0; nn < N_i; ++nn) //Parallel reduction. How big is N_i?
                    {
                        y_sum += X_data[x_offset + nn]
                        * A_data[a_offset + nn * M + mm];
                    }
                    Y_data[y_offset + mm] += alpha * y_sum;
                }
            }
        }
    """

    text = Template(text, output_encoding='ascii').render(**textconf)
    gsize = (int(max(Ms)), int(len(Y)),)
    lsize = None
    _fn = cl.Program(queue.context, text).build().fn
#.........这里部分代码省略.........

开发者ID:zero-impact，项目名称:nengo_ocl，代码行数:103，代码来源:gemv_batched.py

注：本文中的plan.Plan.Ns方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。