Python strutil.render_string函数代码示例

本文整理汇总了Python中theano.misc.strutil.render_string函数的典型用法代码示例。如果您正苦于以下问题：Python render_string函数的具体用法？Python render_string怎么用？Python render_string使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了render_string函数的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: c_code


#.........这里部分代码省略.........
            }

{ // extra scope so fail works
            //Read and check stride arguments
            const int dr = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,0);
            const int dc = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,1);
            const int dt = *(dtype_%(d)s*)PyArray_GETPTR1(%(d)s,2);
            if (dr <= 0 || dc <= 0 || dt <= 0)
            {
                PyErr_Format(PyExc_ValueError,"ConvGrad3D: Strides should all be positive but they are %%i, %%i, %%i",dr,dc,dt);
                %(fail)s
            }

{ // extra scope so fail works
            //Compute correct sized of output
            const int outputHeight = int( (vidHeight - filterHeight) / dr )+1;
            const int outputWidth = int( (vidWidth - filterWidth) / dc )+1;
            const int outputDur = int( (vidDur - filterDur) / dt ) +1;



            if (PyArray_DIMS(%(dCdH)s)[0] != batchSize ||
                PyArray_DIMS(%(dCdH)s)[4] != outputChannels ||
                PyArray_DIMS(%(dCdH)s)[1] != outputHeight ||
                PyArray_DIMS(%(dCdH)s)[2] != outputWidth ||
                PyArray_DIMS(%(dCdH)s)[3] != outputDur)
            {
                PyErr_Format(PyExc_ValueError, "dCdH is the wrong size, expected (%%i,%%i,%%i,%%i,%%i), got (%%li,%%li,%%li,%%li,%%li)", batchSize,  outputHeight, outputWidth, outputDur, outputChannels, (long)PyArray_DIMS(%(dCdH)s)[0], (long)PyArray_DIMS(%(dCdH)s)[1], (long)PyArray_DIMS(%(dCdH)s)[2], (long)PyArray_DIMS(%(dCdH)s)[3], (long)PyArray_DIMS(%(dCdH)s)[4]);
                %(fail)s
            }
{ // extra scope for fail

            npy_intp dims[5];
            dims[0] = outputChannels;
            dims[4] = inputChannels;
            dims[1] = filterHeight;
            dims[2] = filterWidth;
            dims[3] = filterDur;

            if(!(%(dCdW)s)  || PyArray_DIMS(%(dCdW)s)[0]!=dims[0] ||
                  PyArray_DIMS(%(dCdW)s)[1]!=dims[1] ||
                  PyArray_DIMS(%(dCdW)s)[2]!=dims[2] ||
                  PyArray_DIMS(%(dCdW)s)[3]!=dims[3] ||
                  PyArray_DIMS(%(dCdW)s)[4]!=dims[4] ){
               Py_XDECREF(%(dCdW)s);
               %(dCdW)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(V)s)->type_num);

               if (!(%(dCdW)s)) {
                  PyErr_Format(PyExc_MemoryError,"ConvGrad3D: Could not allocate dCdW");
                %(fail)s
               }
            }
{ //extra scope so fail works

            #define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )

            #define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )

            const int dhs3 = PyArray_STRIDES(%(dCdH)s)[3];
            const int dtvs3 = dt * PyArray_STRIDES(%(V)s)[3];

            // Compute dCdW
            //TODO-- see if this can be made faster by using ELEM_AT instead of ELEM5
            // dCdW[j,k,l,m,z] = sum_i sum_p sum_q sum_r dCdH[i,p,q,r,j]  *  V[i,dr*p+k,dc*q+l,dt*r+m,z]
            for (int j = 0; j < outputChannels; j++) {
                for (int z = 0; z < inputChannels; z++) {
                    for (int k = 0; k < filterHeight; k++) {
                        for (int l = 0; l < filterWidth; l++) {
                            for (int m = 0; m < filterDur; m++) {

                                //printf("writePos %%i %%i %%i %%i %%i \\n",j,k,l,m,z);

                                dtype_%(dCdW)s & writePos =  ELEM5(%(dCdW)s, j,k,l,m,z);
                                writePos = 0;
                                for (int i = 0; i < batchSize; i++) {
                                    for (int p = 0; p < outputHeight; p++) {
                                        for (int q = 0; q < outputWidth; q++) {
                                            int Hpos = i * PyArray_STRIDES(%(dCdH)s)[0] + j * PyArray_STRIDES(%(dCdH)s)[4] + p * PyArray_STRIDES(%(dCdH)s)[1] + q * PyArray_STRIDES(%(dCdH)s)[2] ;
                                            int Vpos = i * PyArray_STRIDES(%(V)s)[0] + z * PyArray_STRIDES(%(V)s)[4] +  (dr * p+k) * PyArray_STRIDES(%(V)s)[1] +  (dc*q+l) * PyArray_STRIDES(%(V)s)[2] + m * PyArray_STRIDES(%(V)s)[3];

                                            for (int r = 0; r < outputDur; r++) {
                                                writePos += ELEM5(%(dCdH)s,i,p,q,r,j) * ELEM5(%(V)s,i,dr*p+k,dc*q+l,dt*r+m,z);
                                                //writePos += ELEM_AT(%(dCdH)s,Hpos) * ELEM_AT(%(V)s,Vpos);
                                                Hpos += dhs3;
                                                Vpos += dtvs3;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

}}}}}}} // extra scope for fail
            ///////////// < /code generated by ConvGradW3D >
        """

        return strutil.render_string(codeSource, locals())

开发者ID:amanrajdce，项目名称:Theano，代码行数:101，代码来源:ConvGrad3D.py

示例2: c_code


#.........这里部分代码省略.........
        codeSource += """
            {
              //General case code
              //std::cout << "general case code" << std::endl;
              long long Hpos = 0;
              long long Vpos = 0;
              for (int i = 0; i < batchSize; i++) {
                    long long Hposi = Hpos;
                    long long Vposi = Vpos;


                    for (int r = 0;  r < outputHeight; r++) {
                      long long Hposr = Hpos;
                      long long Vposr = Vpos;
                      for (int c = 0; c < outputWidth; c++) {
                       long long Hposc = Hpos;
                       long long Vposc = Vpos;
                       for (int t = 0; t < outputDur; t++) {
                            long long Hpost = Hpos;
                            long long Vpost = Vpos;
                            //of the loops so far, j should be the innermost, because
                            //each loop through j visits the same elements of V
                            //this implies that the last index of H should be the j index
                            //since V and H should have the same format, this means
                            //z should be the last index in v, and therefore the innermost
                            //of the next set of for loops

                            int Wpos = 0;
                            int bPos = 0;


                            for (int j = 0; j < outputChannels; j++) {


                                long long Hposj = Hpos;
                                long long Vposj = Vpos;
                                int Wposj = Wpos;

                                // H[i,r,c,t,j] = b[j]

                                dtype_%(H)s & writePos = ELEM_AT(%(H)s,Hpos);


                                writePos = ELEM_AT(%(b)s,bPos);


                                for (int k =0; k < filterHeight; k++) {
                                  int Wposk = Wpos;
                                  long long Vposk = Vpos;
                                  for (int l = 0; l < filterWidth; l++) {
                                    int Wposl = Wpos;
                                    long long Vposl = Vpos;
                                    for (int m = 0; m < filterDur; m++) {
                                      int Wposm = Wpos;
                                      long long Vposm = Vpos;
                                      for (int z = 0; z < inputChannels; z++) {
                                        //H[i,r,c,t,j] += W[j,z,k,l,m] * V[i,dr*r+k, dc*c+l, dt*t+m,z]


                                        writePos += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(V)s,Vpos);

                                        Wpos += ws4;
                                        Vpos += vs4;
                                      } // close z
                                      Wpos = Wposm + ws3;
                                      Vpos = Vposm + vs3;
                                    } // close m
                                    Wpos = Wposl + ws2;
                                    Vpos = Vposl + vs2;
                                  } //close l
                                  Wpos = Wposk + PyArray_STRIDES(%(W)s)[1];
                                  Vpos = Vposk + PyArray_STRIDES(%(V)s)[1];
                                } //close k


                              bPos += bs;
                              Wpos = Wposj + ws0;
                              Hpos = Hposj +  hs4;
                              Vpos = Vposj;
                              //std::cout << "incremented Wpos by " << ws0 << std::endl;
                              //std::cout << "incremented Hpos by " << hs4 << std::endl;
                             } //close j
                             Hpos = Hpost + PyArray_STRIDES(%(H)s)[3];
                             Vpos = Vpost + vs3 * dt;
                         } //close t
                         Hpos = Hposc + PyArray_STRIDES(%(H)s)[2];
                         Vpos = Vposc + vs2 * dc;
                       } //close c
                       Hpos = Hposr + PyArray_STRIDES(%(H)s)[1];
                       Vpos = Vposr + PyArray_STRIDES(%(V)s)[1] * dr;
                   } //closes r
                   Hpos = Hposi + PyArray_STRIDES(%(H)s)[0];
                   Vpos = Vposi + PyArray_STRIDES(%(V)s)[0];
              } //closes i
            } //closes general case code
}}}}}}} //extra scope so error handler jumps don't cross declarations
            ///////////// < /code generated by Conv3D >
        """

        return strutil.render_string(codeSource,locals())

开发者ID:Snarfza，项目名称:Theano，代码行数:101，代码来源:Conv3D.py

示例3: c_code


#.........这里部分代码省略.........
                    videoWidth = RShape1;
                    videoDur = RShape2;
                }
            }

            //Allocate the reconstruction
            npy_intp dims[5];
            dims[0] = batchSize;
            dims[4] = inputChannels;
            dims[1] = videoHeight;
            dims[2] = videoWidth;
            dims[3] = videoDur;

                        if(!(%(R)s) || CudaNdarray_HOST_DIMS(%(R)s)[0]!=dims[0] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[1]!=dims[1] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[2]!=dims[2] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[3]!=dims[3] ||
                        CudaNdarray_HOST_DIMS(%(R)s)[4]!=dims[4]){
                    Py_XDECREF(%(R)s);
               %(R)s = (CudaNdarray*)CudaNdarray_NewDims(5,dims);
                if (!(%(R)s)) {
                    PyErr_Format(PyExc_MemoryError,"Could not allocate R");
                    %(fail)s;
                }
                        }
            cudaMemset(CudaNdarray_DEV_DATA(%(R)s), 0, 4 * batchSize * inputChannels * videoHeight * videoWidth * videoDur);

{ // for fail

bool out_contiguous = CudaNdarray_is_c_contiguous(%(R)s);
int version = -1;
int verbose = 0;
bool subsample =(dr>1)||(dc>1)||(dt>1);
bool b_strided = (CudaNdarray_HOST_STRIDES(%(b)s)[0]!=1) && !(CudaNdarray_HOST_STRIDES(%(b)s)[0]==0 && outputChannels==1);
printf("b stride0=%%d\\n",CudaNdarray_HOST_STRIDES(%(b)s)[0]);
bool work_complete = false;

const int ws4 = CudaNdarray_HOST_STRIDES(%(W)s)[4];
const int ws3 = CudaNdarray_HOST_STRIDES(%(W)s)[3];
const int ws2 = CudaNdarray_HOST_STRIDES(%(W)s)[2];
const int ws1 = CudaNdarray_HOST_STRIDES(%(W)s)[1];
const int ws0 = CudaNdarray_HOST_STRIDES(%(W)s)[0];
const int hs4 = CudaNdarray_HOST_STRIDES(%(H)s)[4];
const int hs3 = CudaNdarray_HOST_STRIDES(%(H)s)[3];
const int hs2 = CudaNdarray_HOST_STRIDES(%(H)s)[2];
const int hs1 = CudaNdarray_HOST_STRIDES(%(H)s)[1];
const int hs0 = CudaNdarray_HOST_STRIDES(%(H)s)[0];


if(out_contiguous && (version==0||version==-1) && outputDur<=512 && !work_complete){
    //conv_transp_rows_stack
    dim3 grid(batchSize * inputChannels, videoHeight * videoWidth);
    dim3 threads(videoDur);

HERE

    int shared_size=0;
        conv_transp_rows_stack<<<grid, threads, shared_size>>>(
        CudaNdarray_DEV_DATA(%(H)s), CudaNdarray_DEV_DATA(%(W)s), CudaNdarray_DEV_DATA(%(b)s), CudaNdarray_DEV_DATA(%(R)s),
        videoHeight, videoWidth, videoDur,
        filterHeight, filterWidth, filterDur,
        outputHeight, outputWidth, outputDur,
        outputChannels, inputChannels,
        dr,dc,dt,
        hs3,hs2,hs1,hs4,hs0,
        ws3,ws2,ws1,ws4,ws0,
        CudaNdarray_HOST_STRIDES(%(b)s)[0]);

        CNDA_THREAD_SYNC;
        cudaError_t sts = cudaGetLastError();
        if (cudaSuccess == sts)
        {
            work_complete = true;
        if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("INFO: used 'conv_transp_rows_stack' version\\n");
        }
        else
        {
            if (verbose) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("ERROR: all implementations failed for GpuConvTransp3D! (%%s)",cudaGetErrorString(sts));
            PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed for GpuConvTransp3D! (%%s)",
                    cudaGetErrorString(sts));
            %(fail)s
        }


}

if(!work_complete){
            PyErr_Format(PyExc_RuntimeError, "ERROR: no implementations executed for this GpuConvTransp3D! out_contiguous=%%d b_strided=%%d outputDur=%%d",
                         out_contiguous,b_strided,outputDur);
            %(fail)s
}



}}}}}} // for fail
            ///////////// < /code generated by GpuConvTransp3D >
        """
        return strutil.render_string(codeSource, locals())

开发者ID:317070，项目名称:Theano，代码行数:101，代码来源:GpuConvTransp3D.py

示例4: c_code


#.........这里部分代码省略.........
                                    PyArray_DIMS(%(R)s)[4]!=dims[4])
                                   {
                                       Py_XDECREF(%(R)s);
                                       %(R)s = (PyArrayObject *) PyArray_SimpleNew(5, dims, PyArray_DESCR(%(H)s)->type_num);
                                       if (!(%(R)s)) {
                                           PyErr_Format(PyExc_MemoryError, "ConvTransp3D: could not allocate R");
                                           %(fail)s
                                       }
                                   }

                                   { // for fail 6

                                       #define ELEM5(x, i,j,k,l,m) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i)*PyArray_STRIDES(x)[0]+(j)*PyArray_STRIDES(x)[1]+(k)*PyArray_STRIDES(x)[2]+(l)*PyArray_STRIDES(x)[3]+(m)*PyArray_STRIDES(x)[4] )
                                       #define ELEM_AT(x, i) * ( dtype_ ## x *) ( PyArray_BYTES(x) + (i) )



                                       dtype_%(b)s * b = (dtype_%(b)s *) PyArray_DATA(%(b)s);

                                       int rs4 = PyArray_STRIDES(%(R)s)[4];
                                       int ws0 = PyArray_STRIDES(%(W)s)[0];
                                       int ws4 = PyArray_STRIDES(%(W)s)[4];
                                       int hs4 = PyArray_STRIDES(%(H)s)[4];

                                       // Compute R
                                       // R[i,r,c,t,j] = b_j + sum_{rc,rk | d \circ rc + rk = r} sum_{cc,ck | ...} sum_{tc,tk | ...} sum_k W[k, rk, ck, tk,j] * H[i,rc,cc,tc,k]

                                       for (int i = 0; i < batchSize; i++) {
                                        for (int r = 0; r < videoHeight; r++) {
                                         const int frc = (int)std::max(0.0f, ceilf(float(r-filterHeight+1)/float(dr)));
                                         for (int c = 0; c < videoWidth; c++) {
                                          const int fcc = (int)std::max(0.0f, ceilf(float(c-filterWidth +1)/float(dc)));
                                          for (int t = 0; t < videoDur; t++) {
                                           const int ftc = (int)std::max(0.0f, ceilf(float(t-filterDur +1)  /float(dt)));

                                           long long Rpost = i * PyArray_STRIDES(%(R)s)[0] + r * PyArray_STRIDES(%(R)s)[1] + c * PyArray_STRIDES(%(R)s)[2] + t * PyArray_STRIDES(%(R)s)[3];

                                           long long Rpos = Rpost;
                                           for (int j = 0; j < inputChannels; j++)
                                           {
                                            //ELEM5(%(R)s, i,r,c,t,j) = b[j];
                                            ELEM_AT(%(R)s,Rpos) = b[j];
                                            Rpos += rs4;
                                           }


                                           for (int rc = frc; rc < outputHeight; rc++) {
                                            const int rk = r - rc * dr;
                                            if (rk < 0) break;

                                            for (int cc = fcc; cc < outputWidth; cc++) {
                                             const int ck = c - cc * dc;
                                             if (ck < 0) break;

                                             for (int tc = ftc; tc < outputDur; tc++)
                                             {
                                              const int tk = t - tc * dt;
                                              if (tk < 0) break;

                                              int Wpos = rk * PyArray_STRIDES(%(W)s)[1] +  ck * PyArray_STRIDES(%(W)s)[2] + tk * PyArray_STRIDES(%(W)s)[3];
                                              int Hpostc = i * PyArray_STRIDES(%(H)s)[0] +      rc * PyArray_STRIDES(%(H)s)[1] +  cc * PyArray_STRIDES(%(H)s)[2] + tc * PyArray_STRIDES(%(H)s)[3];
                                              Rpos = Rpost;
                                              for (int j = 0; j < inputChannels; j++)
                                              {
                                               int Wposj = Wpos;
                                               dtype_%(R)s & writePos = ELEM_AT(%(R)s,Rpos);

                                               int Hpos = Hpostc;

                                               for (int k = 0; k < outputChannels; k++) {
                                                //TODO-- it's probably bad in terms of cache that our inner loop is over the largest stride of W.... maybe OK since it's the smallest stride of H
                                                //writePos += ELEM5(%(W)s,k,rk,ck,tk,j) * ELEM5(%(H)s,i,rc,cc,tc,k);
                                                //writePos += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(H)s,Hpos);

                                                writePos  += ELEM_AT(%(W)s,Wpos) * ELEM_AT(%(H)s,Hpos);

                                                Wpos += ws0;
                                                Hpos += hs4;

                                               } //close the k loop
                                               Rpos += rs4;
                                               Wpos = Wposj +  ws4;
                                              } //close the j loop
                                             } // close the tc loop
                                            } //cc
                                           } //rc
                                          } //t
                                         } //c
                                        } //r
                                       } //i
                                   } //for fail 6
                               } //for fail 5
                           } //for fail 4
                       } //for fail 3
                   } //for fail 2
               } // for fail 1
               ///////////// < /code generated by ConvTransp3D >
                     """

        return strutil.render_string(codeSource, locals())

开发者ID:317070，项目名称:Theano，代码行数:101，代码来源:ConvTransp3D.py

示例5: c_code


#.........这里部分代码省略.........

        setup_nv_weights_grads = """
        int filters_dims[4];
        // filters:  (input channels, filter rows, filter cols, output channels)
        filters_dims[0] = img_channels;
        filters_dims[1] = imgSizeY - hidGradsSizeY + 1 - 2 * paddingStart;
        filters_dims[2] = imgSizeX - hidGradsSizeX + 1 - 2 * paddingStart;
        assert(filters_dims[1] == filters_dims[2]); // only square kernels are supported
        filters_dims[3] = numFilters;
        const int filterSize = filters_dims[1];
        int partialsum_storage_dims[5];
        for (int i = 1; i < 5; i++)
        {
            partialsum_storage_dims[i] = filters_dims[i - 1];
        }
        partialsum_storage_dims[0] = numModules / partialSum;
        CudaNdarray *partialsum_storage = NULL;
        if (partialSum != numModules &&
            CudaNdarray_prep_output(&partialsum_storage, 5,
                                    partialsum_storage_dims))
        {
            %(fail)s;
        }

        for (int i = 0; i < 4; i++)
        {
            if (filters_dims[i] <= 0)
            {
                printf("filters_dims[%%d] = %%d\\n", i, filters_dims[i]);
                assert(false);
            }
        }
        if (CudaNdarray_prep_output(& %(weights_grads)s, 4, filters_dims))
        {
            Py_DECREF(partialsum_storage);
            %(fail)s;
        }

        { // setup_nv_weights_grad brace # 1

        NVMatrix nv_weights_grads(%(weights_grads)s, filters_dims[0] * filterSize * filterSize, numFilters,
        "weight_acts:nv_weights_grads");

        """

        num_braces += 1

        # note: imgSizeX is not specified here, it is computed internally
        # (in _filterActsSparse) by the lines:
        # int imgPixels = images.getNumRows() / numImgColors;
        # int imgSizeX = imgPixels / imgSizeY;
        #
        # note: numFilters is not specified here. it is determined by
        # nv_filters.getNumCols()
        #
        # note: the size of the filters is determined by dividing
        # nv_filters.getNumRows() by numFilterColors
        #
        run_kernel = """

        if (partialSum == numModules)
            _weightActs(nv_images, nv_hid_grads, nv_weights_grads,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
        else {
            NVMatrix nv_partialsum(partialsum_storage, (numModules / partialSum) *
                     filters_dims[0] * filterSize * filterSize, numFilters,
                     "weight_acts: nv_partialsum");
            _weightActs(nv_images, nv_hid_grads, nv_partialsum,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
            nv_partialsum.reshape((numModules / partialSum), filters_dims[0] * filterSize * filterSize * numFilters);

            // sum out axis 0 of nv_partialsum
            #define AXIS 0
            // scale the contents of nv_weights_grads by 0
            // i.e., clear out its pre-existing content
            #define SCALE_THIS 0
            // scale the new sum by 1, i.e., don't do any scaling
            #define SCALE_SUM 1
            nv_weights_grads.addSum(nv_partialsum, AXIS, SCALE_THIS, SCALE_SUM);

            Py_DECREF(partialsum_storage);
        }
        """

        braces = '}' * num_braces

        rval = (basic_setup +
                setup_nv_images +
                setup_nv_hid_grads +
                setup_nv_weights_grads +
                run_kernel +
                braces)

        rval = render_string(rval, locals())

        return rval

开发者ID:casperkaae，项目名称:pylearn2，代码行数:101，代码来源:weight_acts.py

示例6: c_code


#.........这里部分代码省略.........
                        if (dr <= 0 || dc <= 0 || dt <= 0)
                        {
                PyErr_Format(PyExc_ValueError, "GpuConv3D: Strides must all be positive but are %%i, %%i, %%i", dr, dc, dt);
                %(fail)s
                        }
{ // extra scope so fail works

                        //Make correctly sized output
                        const int outputHeight = int( (vidHeight - filterHeight) / dr )+1;
                        const int outputWidth = int( (vidWidth - filterWidth) / dc )+1;
                        const int outputDur = int( (vidDur - filterDur) / dt ) +1;

                        npy_intp dims[5];
                        dims[0] = batchSize;
                        dims[4] = outputChannels;
                        dims[1] = outputHeight;
                        dims[2] = outputWidth;
                        dims[3] = outputDur;

                        if(!(%(H)s) || CudaNdarray_HOST_DIMS(%(H)s)[0]!=dims[0] ||
                        CudaNdarray_HOST_DIMS(%(H)s)[1]!=dims[1] ||
                        CudaNdarray_HOST_DIMS(%(H)s)[2]!=dims[2] ||
                        CudaNdarray_HOST_DIMS(%(H)s)[3]!=dims[3] ||
                        CudaNdarray_HOST_DIMS(%(H)s)[4]!=dims[4]){
                                Py_XDECREF(%(H)s);
                                %(H)s = (CudaNdarray*)CudaNdarray_NewDims(5,dims);
                                if (!(%(H)s)) {

                    PyErr_Format(PyExc_MemoryError, "GpuConv3D: could not allocate output");
                            %(fail)s
                                }
                        }
{ // extra scope so fail will not cross declarations
                        //#define ELEM_AT(x, i) * ( dtype_ ## x *) ( x->data + (i) )####################

                        const int ws4 = CudaNdarray_HOST_STRIDES(%(W)s)[4];
                        const int vs4 = CudaNdarray_HOST_STRIDES(%(V)s)[4];
                        const int ws3 = CudaNdarray_HOST_STRIDES(%(W)s)[3];
                        const int vs3 = CudaNdarray_HOST_STRIDES(%(V)s)[3];
                        const int ws2 = CudaNdarray_HOST_STRIDES(%(W)s)[2];
                        const int vs2 = CudaNdarray_HOST_STRIDES(%(V)s)[2];
                        const int ws1 = CudaNdarray_HOST_STRIDES(%(W)s)[1];
                        const int vs1 = CudaNdarray_HOST_STRIDES(%(V)s)[1];
                        const int ws0 = CudaNdarray_HOST_STRIDES(%(W)s)[0];
                        const int vs0 = CudaNdarray_HOST_STRIDES(%(V)s)[0];

                        // Compute H
                        //H[i,x,y,t,j] = b_j + sum_k sum_l sum_m sum_z W[j,k,l,m,z] V[i, dr*r+k,dc*c+l,dt*t+m,z]

bool out_contiguous = CudaNdarray_is_c_contiguous(%(H)s);
int version = -1;
int verbose = 0;
bool subsample =(dr>1)||(dc>1)||(dt>1);
bool b_strided = (CudaNdarray_HOST_STRIDES(%(b)s)[0]!=1) && !(CudaNdarray_HOST_STRIDES(%(b)s)[0]==0 && outputChannels==1);
bool work_complete = false;

if(out_contiguous && !b_strided && (version==0||version==-1) && outputDur<=512 && !work_complete){
    //conv_rows_stack
    dim3 grid(outputHeight*outputWidth,batchSize*outputChannels);
    dim3 threads(outputDur);

    int shared_size=0;
        conv_rows_stack<<<grid, threads, shared_size>>>(
        CudaNdarray_DEV_DATA(%(V)s), CudaNdarray_DEV_DATA(%(W)s), CudaNdarray_DEV_DATA(%(b)s), CudaNdarray_DEV_DATA(%(H)s),
        vidHeight, vidWidth, vidDur,
        filterHeight, filterWidth, filterDur,
        outputChannels, inputChannels,
        dr,dc,dt,
        vs3,vs2,vs1,vs4,vs0,
        ws3,ws2,ws1,ws4,ws0);

        CNDA_THREAD_SYNC;
        cudaError_t sts = cudaGetLastError();
        if (cudaSuccess == sts)
        {
            work_complete = true;
            if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("INFO: used 'conv_rows_stack' version\\n");
        }
        else
        {
            if (verbose) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("ERROR: all implementations failed for GpuConv3D! (%%s)",cudaGetErrorString(sts));
            PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed for GpuConv3D! (%%s)",
                    cudaGetErrorString(sts));
            %(fail)s
        }


}

if(!work_complete){
            PyErr_Format(PyExc_RuntimeError, "ERROR: no implementations executed for this GpuConv3D!");
            %(fail)s
}

}}}}}}} //extra scope so error handler jumps don't cross declarations
                        ///////////// < /code generated by GpuConv3D >
        """
        return strutil.render_string(codeSource, locals())

开发者ID:317070，项目名称:Theano，代码行数:101，代码来源:GpuConv3D.py

示例7: c_code


#.........这里部分代码省略.........


            //Compute correctl sized of output
            const int outputHeight = int( (vidHeight - filterHeight) / dr )+1;
            const int outputWidth = int( (vidWidth - filterWidth) / dc )+1;
            const int outputDur = int( (vidDur - filterDur) / dt ) +1;

            if (CudaNdarray_HOST_DIMS(%(dCdH)s)[0] != batchSize ||
                CudaNdarray_HOST_DIMS(%(dCdH)s)[4] != outputChannels ||
                CudaNdarray_HOST_DIMS(%(dCdH)s)[1] != outputHeight ||
                CudaNdarray_HOST_DIMS(%(dCdH)s)[2] != outputWidth ||
                CudaNdarray_HOST_DIMS(%(dCdH)s)[3] != outputDur)
            {
                PyErr_Format(PyExc_ValueError, "dCdH is the wrong size, expected (%%i,%%i,%%i,%%i,%%i), got (%%i,%%i,%%i,%%i,%%i)", batchSize, outputHeight, outputWidth, outputDur, outputChannels, CudaNdarray_HOST_DIMS(%(dCdH)s)[0], CudaNdarray_HOST_DIMS(%(dCdH)s)[1], CudaNdarray_HOST_DIMS(%(dCdH)s)[2] ,CudaNdarray_HOST_DIMS(%(dCdH)s)[3], CudaNdarray_HOST_DIMS(%(dCdH)s)[4] );
                %(fail)s
            }
{ // extra scope for fail

            npy_intp dims[5];
            dims[0] = outputChannels;
            dims[4] = inputChannels;
            dims[1] = filterHeight;
            dims[2] = filterWidth;
            dims[3] = filterDur;

            if(!(%(dCdW)s)  || CudaNdarray_HOST_DIMS(%(dCdW)s)[0]!=dims[0] ||
                  CudaNdarray_HOST_DIMS(%(dCdW)s)[1]!=dims[1] ||
                  CudaNdarray_HOST_DIMS(%(dCdW)s)[2]!=dims[2] ||
                  CudaNdarray_HOST_DIMS(%(dCdW)s)[3]!=dims[3] ||
                  CudaNdarray_HOST_DIMS(%(dCdW)s)[4]!=dims[4] ){
               Py_XDECREF(%(dCdW)s);
               %(dCdW)s = (CudaNdarray*)CudaNdarray_NewDims(5,dims);

               if (!(%(dCdW)s)) {
                PyErr_Format(PyExc_MemoryError, "GpuConvGrad3D: Could not allocated dCdW");
                %(fail)s
               }
            }
{ //for fail
            const int dcdhs4 = CudaNdarray_HOST_STRIDES(%(dCdH)s)[4];
            const int dcdhs3 = CudaNdarray_HOST_STRIDES(%(dCdH)s)[3];
            const int dcdhs1 = CudaNdarray_HOST_STRIDES(%(dCdH)s)[1];
            const int dcdhs2 = CudaNdarray_HOST_STRIDES(%(dCdH)s)[2];
            const int dcdhs0 = CudaNdarray_HOST_STRIDES(%(dCdH)s)[0];
            const int vs4 = CudaNdarray_HOST_STRIDES(%(V)s)[4];
            const int vs3 = CudaNdarray_HOST_STRIDES(%(V)s)[3];
            const int vs2 = CudaNdarray_HOST_STRIDES(%(V)s)[2];
            const int vs1 = CudaNdarray_HOST_STRIDES(%(V)s)[1];
            const int vs0 = CudaNdarray_HOST_STRIDES(%(V)s)[0];

bool out_contiguous = CudaNdarray_is_c_contiguous(%(dCdW)s);
int version = -1;
int verbose = 0;
bool subsample =(dr>1)||(dc>1)||(dt>1);
bool work_complete = false;
if(out_contiguous && (version==0||version==-1) && WShape[4]<=512 && !work_complete){
    //conv_rows_stack
    dim3 grid(WShape[0]*WShape[4],WShape[1]*WShape[2]);//outputHeight*outputWidth);
    dim3 threads(WShape[3]);

    int shared_size=0;

        convgrad_rows_stack<<<grid, threads, shared_size>>>(
        CudaNdarray_DEV_DATA(%(V)s), CudaNdarray_DEV_DATA(%(dCdH)s), CudaNdarray_DEV_DATA(%(dCdW)s),
        vidHeight, vidWidth, vidDur,
        filterHeight, filterWidth, filterDur,
        WShape[0], WShape[1], WShape[2], WShape[3], WShape[4],
        outputHeight,outputWidth,outputDur,
        batchSize, outputChannels, inputChannels,
        dr,dc,dt,
        vs3,vs2,vs1,vs4,vs0,
        dcdhs3,dcdhs2,dcdhs1,dcdhs4,dcdhs0);

        CNDA_THREAD_SYNC;
        cudaError_t sts = cudaGetLastError();
        if (cudaSuccess == sts)
        {
            work_complete = true;
            if (verbose>1) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("INFO: used 'conv_rows_stack' version\\n");
        }
        else
        {
            if (verbose) printf("threads.x=%%i, threads.y=%%i, grid.x=%%i, grid.y=%%i, shared_size=%%i, nb_threads=%%i\\n", threads.x, threads.y, grid.x, grid.y, shared_size, threads.x * threads.y);
            if (verbose) printf("ERROR: all implementations failed for GpuConv3D! (%%s)",cudaGetErrorString(sts));
            PyErr_Format(PyExc_RuntimeError, "ERROR: all implementations failed for GpuConvGrad3D! (%%s)",
                    cudaGetErrorString(sts));
            %(fail)s
        }

}
if(!work_complete){
            PyErr_Format(PyExc_RuntimeError, "ERROR: no implementations executed for this GpuConv3D!");
            %(fail)s
}
}}}}} // extra scope for fail
            ///////////// < /code generated by GpuConvGrad3D >
        """

        return strutil.render_string(codeSource, locals())

开发者ID:5730279821-TA，项目名称:Theano，代码行数:101，代码来源:GpuConvGrad3D.py

示例8: c_code


#.........这里部分代码省略.........
            PyErr_Format(PyExc_ValueError,
            "filter must be square, but have shape (%%d, %%d).",
            filters_dims[1], filters_dims[2]);
            %(fail)s;
        }
        else if (moduleStride > filters_dims[1]) {
            PyErr_Format(PyExc_ValueError,
            "stride %%d greater than filter size (%%d, %%d)",
            moduleStride, filters_dims[1], filters_dims[2]);
            %(fail)s;
        }
        filters_dims[3] = numFilters;
        const int filterSize = filters_dims[1];
        int partialsum_storage_dims[5];
        for (int i = 1; i < 5; i++)
        {
            partialsum_storage_dims[i] = filters_dims[i - 1];
        }
        partialsum_storage_dims[0] = numModules / partialSum;
        if (partialSum != numModules &&
            CudaNdarray_prep_output(&%(partialsum_storage)s, 5,
                                    partialsum_storage_dims))
        {
            %(fail)s;
        }

        for (int i = 0; i < 4; i++)
        {
            if (filters_dims[i] <= 0)
            {
                printf("filters_dims[%%d] = %%d\\n", i, filters_dims[i]);
                assert(false);
            }
        }
        if (CudaNdarray_prep_output(& %(weights_grads)s, 4, filters_dims))
        {
            %(fail)s;
        }

        { // setup_nv_weights_grad brace # 1

        NVMatrix nv_weights_grads(%(weights_grads)s, filters_dims[0] * filterSize
                                  * filterSize, numFilters,
                                  "weight_acts:nv_weights_grads");

        """

        num_braces += 1

        # note: imgSizeX is not specified here, it is computed internally
        # (in _filterActsSparse) by the lines:
        # int imgPixels = images.getNumRows() / numImgColors;
        # int imgSizeX = imgPixels / imgSizeY;
        #
        # note: numFilters is not specified here. it is determined by
        # nv_filters.getNumCols()
        #
        # note: the size of the filters is determined by dividing
        # nv_filters.getNumRows() by numFilterColors
        #
        run_kernel = """

        if (partialSum == numModules)
            _weightActs(nv_images, nv_hid_grads, nv_weights_grads,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
        else {
            NVMatrix nv_partialsum(%(partialsum_storage)s, (numModules / partialSum) *
                     filters_dims[0] * filterSize * filterSize, numFilters,
                     "weight_acts: nv_partialsum");
            _weightActs(nv_images, nv_hid_grads, nv_partialsum,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
            nv_partialsum.reshape((numModules / partialSum), filters_dims[0] * filterSize * filterSize * numFilters);

            // sum out axis 0 of nv_partialsum
            #define AXIS 0
            // scale the contents of nv_weights_grads by 0
            // i.e., clear out its pre-existing content
            #define SCALE_THIS 0
            // scale the new sum by 1, i.e., don't do any scaling
            #define SCALE_SUM 1
            nv_weights_grads.addSum(nv_partialsum, AXIS, SCALE_THIS, SCALE_SUM);
        }
        """

        braces = '}' * num_braces

        rval = (basic_setup +
                setup_nv_images +
                setup_nv_hid_grads +
                setup_nv_weights_grads +
                run_kernel +
                braces)

        rval = render_string(rval, locals())

        return rval

开发者ID:BloodNg，项目名称:pylearn2，代码行数:101，代码来源:weight_acts.py

注：本文中的theano.misc.strutil.render_string函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。