本文整理汇总了C++中Matrix2D::GetNumPaddedColumns方法的典型用法代码示例。如果您正苦于以下问题:C++ Matrix2D::GetNumPaddedColumns方法的具体用法?C++ Matrix2D::GetNumPaddedColumns怎么用?C++ Matrix2D::GetNumPaddedColumns使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Matrix2D
的用法示例。
在下文中一共展示了Matrix2D::GetNumPaddedColumns方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: sizeof
void
MPICUDAStencil<T>::DoPreIterationWork( T* currBuf, // in device global memory
T* altBuf, // in device global memory
Matrix2D<T>& mtx,
unsigned int iter )
{
// do the halo exchange at desired frequency
// note that we *do not* do the halo exchange here before the
// first iteration, because we did it already (before we first
// pushed the data onto the device) in our operator() method.
unsigned int haloWidth = this->GetNumberIterationsPerHaloExchange();
if( (iter > 0) && (iter % haloWidth) == 0 )
{
unsigned int nRows = mtx.GetNumRows();
unsigned int nCols = mtx.GetNumColumns();
unsigned int nPaddedCols = mtx.GetNumPaddedColumns();
T* flatData = mtx.GetFlatData();
size_t nsDataItemCount = haloWidth * nPaddedCols;
size_t ewDataItemCount = haloWidth * nRows;
size_t nsDataSize = nsDataItemCount * sizeof(T);
size_t ewDataSize = ewDataItemCount * sizeof(T);
//
// read current data off device
// we only read halo, and only for sides where we have a neighbor
//
if( this->HaveNorthNeighbor() )
{
// north data is contiguous - copy directly into matrix
cudaMemcpy( flatData + (haloWidth * nPaddedCols), // dest
currBuf + (haloWidth * nPaddedCols), // src
nsDataSize, // amount to transfer
cudaMemcpyDeviceToHost ); // direction
}
if( this->HaveSouthNeighbor() )
{
// south data is contiguous - copy directly into matrix
cudaMemcpy( flatData + ((nRows - 2*haloWidth)*nPaddedCols), // dest
currBuf + ((nRows - 2*haloWidth)*nPaddedCols), // src
nsDataSize, // amount to transfer
cudaMemcpyDeviceToHost ); // direction
}
if( this->HaveEastNeighbor() )
{
// east data is non-contiguous - but CUDA has a strided read
cudaMemcpy2D( flatData + (nCols - 2*haloWidth), // dest
nPaddedCols * sizeof(T), // dest pitch
currBuf + (nCols - 2*haloWidth), // src
nPaddedCols * sizeof(T), // src pitch
haloWidth * sizeof(T), // width of data to transfer (bytes)
nRows, // height of data to transfer (rows)
cudaMemcpyDeviceToHost ); // transfer direction
}
if( this->HaveWestNeighbor() )
{
// west data is non-contiguous - but CUDA has a strided read
cudaMemcpy2D( flatData + haloWidth, // dest
nPaddedCols * sizeof(T), // dest pitch
currBuf + haloWidth, // src
nPaddedCols * sizeof(T), // src pitch
haloWidth * sizeof(T), // width of data to transfer (bytes)
nRows, // height of data to transfer (rows)
cudaMemcpyDeviceToHost ); // transfer direction
}
//
// do the actual halo exchange
//
if( dumpData )
{
DumpData( ofs, mtx, "before halo exchange" );
}
DoHaloExchange( mtx );
if( dumpData )
{
DumpData( ofs, mtx, "after halo exchange" );
}
//
// push updated data back onto device
// we only write halo, and only for sides where we have a neighbor
//
if( this->HaveNorthNeighbor() )
{
// north data is contiguous - copy directly from matrix
cudaMemcpy( currBuf, // dest
flatData, // src
nsDataSize, // amount to transfer
cudaMemcpyHostToDevice ); // direction
}
if( this->HaveSouthNeighbor() )
{
// south data is contiguous - copy directly from matrix
//.........这里部分代码省略.........