C++ Matrix2D::GetFlatData方法代码示例

本文整理汇总了C++中Matrix2D::GetFlatData方法的典型用法代码示例。如果您正苦于以下问题：C++ Matrix2D::GetFlatData方法的具体用法？C++ Matrix2D::GetFlatData怎么用？C++ Matrix2D::GetFlatData使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Matrix2D的用法示例。

在下文中一共展示了Matrix2D::GetFlatData方法的2个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: sizeof

template <class T> void
MICStencil<T>::operator()( Matrix2D<T>& mtx, unsigned int nIters )
{
    unsigned int uDimWithHalo    = mtx.GetNumRows();
    unsigned int uHaloWidth      = LINESIZE / sizeof(T);
    unsigned int uImgElements    = uDimWithHalo * uDimWithHalo;

    __declspec(target(mic), align(LINESIZE)) T* pIn = mtx.GetFlatData();

    __declspec(target(mic), align(sizeof(T)))    T wcenter      = this->wCenter;
    __declspec(target(mic), align(sizeof(T)))    T wdiag        = this->wDiagonal;
    __declspec(target(mic), align(sizeof(T)))    T wcardinal    = this->wCardinal;

    #pragma offload target(mic) in(pIn:length(uImgElements) ALLOC RETAIN)
    {
        // Just copy pIn to compute the copy transfer time
    }

    #pragma offload target(mic) in(pIn:length(uImgElements) REUSE RETAIN)    \
                                in(uImgElements) in(uDimWithHalo)            \
                                in(wcenter) in(wdiag) in(wcardinal)
    {
        unsigned int uRowPartitions = sysconf(_SC_NPROCESSORS_ONLN) / 4 - 1;
        unsigned int uColPartitions = 4;    // Threads per core for KNC

        unsigned int uRowTileSize    = (uDimWithHalo - 2 * uHaloWidth) / uRowPartitions;
        unsigned int uColTileSize    = (uDimWithHalo - 2 * uHaloWidth) / uColPartitions;

        uRowTileSize = ((uDimWithHalo - 2 * uHaloWidth) % uRowPartitions > 0) ? (uRowTileSize + 1) : (uRowTileSize);

        // Should use the "Halo Val" when filling the memory space
        T *pTmp     = (T*)pIn;
        T *pCrnt = (T*)memset((T*)_mm_malloc(uImgElements * sizeof(T), LINESIZE), 0, uImgElements * sizeof(T));

        #pragma omp parallel firstprivate(pTmp, pCrnt, uRowTileSize, uColTileSize, uHaloWidth, uDimWithHalo)
        {
            unsigned int uThreadId = omp_get_thread_num();

            unsigned int uRowTileId = uThreadId / uColPartitions;
            unsigned int uColTileId = uThreadId % uColPartitions;

            unsigned int uStartLine = uRowTileId * uRowTileSize + uHaloWidth;
            unsigned int uStartCol  = uColTileId * uColTileSize + uHaloWidth;

            unsigned int uEndLine = uStartLine + uRowTileSize;
            uEndLine = (uEndLine > (uDimWithHalo - uHaloWidth)) ? uDimWithHalo - uHaloWidth : uEndLine;

            unsigned int uEndCol    = uStartCol  + uColTileSize;
            uEndCol  = (uEndCol  > (uDimWithHalo - uHaloWidth)) ? uDimWithHalo - uHaloWidth : uEndCol;

            T    cardinal0 = 0.0;
            T    diagonal0 = 0.0;
            T    center0   = 0.0;

            unsigned int cntIterations, i, j;

            for (cntIterations = 0; cntIterations < nIters; cntIterations ++)
            {
                // Do Stencil Operation
                for (i = uStartLine; i < uEndLine; i++)
                {
                    T * pCenter      = &pTmp [ i * uDimWithHalo];
                    T * pTop         = pCenter - uDimWithHalo;
                    T * pBottom      = pCenter + uDimWithHalo;
                    T * pOut         = &pCrnt[ i * uDimWithHalo];

                    __assume_aligned(pCenter, 64);
                    __assume_aligned(pTop,    64);
                    __assume_aligned(pBottom, 64);
                    __assume_aligned(pOut,    64);

                    #pragma simd vectorlengthfor(float)
                    for (j = uStartCol; j < uEndCol; j++)
                    {
                        cardinal0   = pCenter[j - 1] + pCenter[j + 1] + pTop[j] + pBottom[j];
                        diagonal0   = pTop[j - 1] + pTop[j + 1] + pBottom[j - 1] + pBottom[j + 1];
                        center0     = pCenter[j];

                        pOut[j]     = wcardinal * cardinal0 + wdiag * diagonal0 + wcenter * center0;
                    }
                }

                #pragma omp barrier
                ;

                // Switch pointers
                T* pAux    = pTmp;
                pTmp     = pCrnt;
                pCrnt    = pAux;
            } // End For

        } // End Parallel

        _mm_free(pCrnt);
    } // End Offload

    #pragma offload target(mic) out(pIn:length(uImgElements) REUSE FREE)
    {
        // Just copy back pIn
    }
//.........这里部分代码省略.........

开发者ID:ellen-hl，项目名称:shoc-mic，代码行数:101，代码来源:MICStencilKernel.cpp

示例2: sizeof

void
MPICUDAStencil<T>::DoPreIterationWork( T* currBuf,  // in device global memory
                                       T* altBuf,  // in device global memory
                                       Matrix2D<T>& mtx,
                                       unsigned int iter )
{
    // do the halo exchange at desired frequency
    // note that we *do not* do the halo exchange here before the
    // first iteration, because we did it already (before we first
    // pushed the data onto the device) in our operator() method.
    unsigned int haloWidth = this->GetNumberIterationsPerHaloExchange();
    if( (iter > 0) && (iter % haloWidth) == 0 )
    {
        unsigned int nRows = mtx.GetNumRows();
        unsigned int nCols = mtx.GetNumColumns();
        unsigned int nPaddedCols = mtx.GetNumPaddedColumns();
        T* flatData = mtx.GetFlatData();

        size_t nsDataItemCount = haloWidth * nPaddedCols;
        size_t ewDataItemCount = haloWidth * nRows;
        size_t nsDataSize = nsDataItemCount * sizeof(T);
        size_t ewDataSize = ewDataItemCount * sizeof(T);

        //
        // read current data off device
        // we only read halo, and only for sides where we have a neighbor
        //
        if( this->HaveNorthNeighbor() )
        {
            // north data is contiguous - copy directly into matrix
            cudaMemcpy( flatData + (haloWidth * nPaddedCols),   // dest
                        currBuf + (haloWidth * nPaddedCols),     // src
                        nsDataSize,                 // amount to transfer
                        cudaMemcpyDeviceToHost );   // direction
        }

        if( this->HaveSouthNeighbor() )
        {
            // south data is contiguous - copy directly into matrix
            cudaMemcpy( flatData + ((nRows - 2*haloWidth)*nPaddedCols),   // dest
                        currBuf + ((nRows - 2*haloWidth)*nPaddedCols),    // src
                        nsDataSize,                 // amount to transfer
                        cudaMemcpyDeviceToHost );   // direction
        }

        if( this->HaveEastNeighbor() )
        {
            // east data is non-contiguous - but CUDA has a strided read
            cudaMemcpy2D( flatData + (nCols - 2*haloWidth), // dest
                          nPaddedCols * sizeof(T),        // dest pitch
                          currBuf + (nCols - 2*haloWidth),    // src
                          nPaddedCols * sizeof(T),        // src pitch
                          haloWidth * sizeof(T),          // width of data to transfer (bytes)
                          nRows,                          // height of data to transfer (rows)
                          cudaMemcpyDeviceToHost );       // transfer direction
        }

        if( this->HaveWestNeighbor() )
        {
            // west data is non-contiguous - but CUDA has a strided read
            cudaMemcpy2D( flatData + haloWidth,         // dest
                          nPaddedCols * sizeof(T),    // dest pitch
                          currBuf + haloWidth,        // src
                          nPaddedCols * sizeof(T),    // src pitch
                          haloWidth * sizeof(T),      // width of data to transfer (bytes)
                          nRows,          // height of data to transfer (rows)
                          cudaMemcpyDeviceToHost );   // transfer direction

        }


        //
        // do the actual halo exchange
        //
        if( dumpData )
        {
            DumpData( ofs, mtx, "before halo exchange" );
        }
        DoHaloExchange( mtx );
        if( dumpData )
        {
            DumpData( ofs, mtx, "after halo exchange" );
        }

        //
        // push updated data back onto device
        // we only write halo, and only for sides where we have a neighbor
        //
        if( this->HaveNorthNeighbor() )
        {
            // north data is contiguous - copy directly from matrix
            cudaMemcpy( currBuf,                    // dest
                        flatData,                   // src
                        nsDataSize,                 // amount to transfer
                        cudaMemcpyHostToDevice );   // direction
        }

        if( this->HaveSouthNeighbor() )
        {
            // south data is contiguous - copy directly from matrix
//.........这里部分代码省略.........

开发者ID:tositrino，项目名称:shoc，代码行数:101，代码来源:MPICUDAStencil.cpp

注：本文中的Matrix2D::GetFlatData方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。