本文整理汇总了C++中DistMatrix::LockedBuffer方法的典型用法代码示例。如果您正苦于以下问题:C++ DistMatrix::LockedBuffer方法的具体用法?C++ DistMatrix::LockedBuffer怎么用?C++ DistMatrix::LockedBuffer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DistMatrix
的用法示例。
在下文中一共展示了DistMatrix::LockedBuffer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: entry
const DistMatrix<T,MD,STAR>&
DistMatrix<T,MD,STAR>::operator=( const DistMatrix<T,STAR,STAR>& A )
{
#ifndef RELEASE
CallStackEntry entry("[MD,* ] = [* ,* ]");
this->AssertNotLocked();
this->AssertSameGrid( A.Grid() );
#endif
this->ResizeTo( A.Height(), A.Width() );
if( !this->Participating() )
return *this;
const Int lcm = this->grid_->LCM();
const Int colShift = this->ColShift();
const Int width = this->Width();
const Int localHeight = this->LocalHeight();
const T* ABuf = A.LockedBuffer();
const Int ALDim = A.LDim();
T* thisBuffer = this->Buffer();
const Int thisLDim = this->LDim();
PARALLEL_FOR
for( Int j=0; j<width; ++j )
{
T* destCol = &thisBuffer[j*thisLDim];
const T* sourceCol = &ABuf[colShift+j*ALDim];
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
destCol[iLoc] = sourceCol[iLoc*lcm];
}
return *this;
}
示例2: AssertSameGrids
void Scatter
( const DistMatrix<T,CIRC,CIRC>& A,
DistMatrix<T,STAR,STAR>& B )
{
DEBUG_CSE
AssertSameGrids( A, B );
const Int height = A.Height();
const Int width = A.Width();
B.Resize( height, width );
if( B.Participating() )
{
const Int pkgSize = mpi::Pad( height*width );
vector<T> buffer;
FastResize( buffer, pkgSize );
// Pack
if( A.Participating() )
util::InterleaveMatrix
( height, width,
A.LockedBuffer(), 1, A.LDim(),
buffer.data(), 1, height );
// Broadcast from the process that packed
mpi::Broadcast( buffer.data(), pkgSize, A.Root(), A.CrossComm() );
// Unpack
util::InterleaveMatrix
( height, width,
buffer.data(), 1, height,
B.Buffer(), 1, B.LDim() );
}
}
示例3: entry
const DistMatrix<T,STAR,STAR>&
DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,VR,STAR>& A )
{
#ifndef RELEASE
CallStackEntry entry("[* ,* ] = [VR,* ]");
this->AssertNotLocked();
this->AssertSameGrid( A.Grid() );
#endif
const elem::Grid& g = this->Grid();
this->ResizeTo( A.Height(), A.Width() );
if( !this->Participating() )
return *this;
const Int p = g.Size();
const Int height = this->Height();
const Int width = this->Width();
const Int localHeightOfA = A.LocalHeight();
const Int maxLocalHeight = MaxLength(height,p);
const Int portionSize = mpi::Pad( maxLocalHeight*width );
T* buffer = this->auxMemory_.Require( (p+1)*portionSize );
T* sendBuf = &buffer[0];
T* recvBuf = &buffer[portionSize];
// Pack
const Int ALDim = A.LDim();
const T* ABuf = A.LockedBuffer();
PARALLEL_FOR
for( Int j=0; j<width; ++j )
MemCopy
( &sendBuf[j*localHeightOfA], &ABuf[j*ALDim], localHeightOfA );
// Communicate
mpi::AllGather
( sendBuf, portionSize,
recvBuf, portionSize, g.VRComm() );
// Unpack
T* thisBuf = this->Buffer();
const Int thisLDim = this->LDim();
const Int colAlignmentOfA = A.ColAlignment();
OUTER_PARALLEL_FOR
for( Int k=0; k<p; ++k )
{
const T* data = &recvBuf[k*portionSize];
const Int colShift = Shift_( k, colAlignmentOfA, p );
const Int localHeight = Length_( height, colShift, p );
INNER_PARALLEL_FOR
for( Int j=0; j<width; ++j )
{
T* destCol = &thisBuf[colShift+j*thisLDim];
const T* sourceCol = &data[j*localHeight];
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
destCol[iLoc*p] = sourceCol[iLoc];
}
}
this->auxMemory_.Release();
return *this;
}
示例4: AssertSameGrids
void AllGather
( const DistMatrix<T, U, V >& A,
DistMatrix<T,Collect<U>(),Collect<V>()>& B )
{
EL_DEBUG_CSE
AssertSameGrids( A, B );
const Int height = A.Height();
const Int width = A.Width();
B.SetGrid( A.Grid() );
B.Resize( height, width );
if( A.Participating() )
{
if( A.DistSize() == 1 )
{
Copy( A.LockedMatrix(), B.Matrix() );
}
else
{
const Int colStride = A.ColStride();
const Int rowStride = A.RowStride();
const Int distStride = colStride*rowStride;
const Int maxLocalHeight = MaxLength(height,colStride);
const Int maxLocalWidth = MaxLength(width,rowStride);
const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
vector<T> buf;
FastResize( buf, (distStride+1)*portionSize );
T* sendBuf = &buf[0];
T* recvBuf = &buf[portionSize];
// Pack
util::InterleaveMatrix
( A.LocalHeight(), A.LocalWidth(),
A.LockedBuffer(), 1, A.LDim(),
sendBuf, 1, A.LocalHeight() );
// Communicate
mpi::AllGather
( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );
// Unpack
util::StridedUnpack
( height, width,
A.ColAlign(), colStride,
A.RowAlign(), rowStride,
recvBuf, portionSize,
B.Buffer(), B.LDim() );
}
}
if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
El::Broadcast( B, A.CrossComm(), A.Root() );
}
示例5: entry
inline void HermitianSVD
( UpperOrLower uplo, DistMatrix<F>& A,
DistMatrix<BASE(F),VR,STAR>& s, DistMatrix<F>& U, DistMatrix<F>& V )
{
#ifndef RELEASE
CallStackEntry entry("HermitianSVD");
#endif
#ifdef HAVE_PMRRR
typedef BASE(F) R;
// Grab an eigenvalue decomposition of A
HermitianEig( uplo, A, s, V );
// Redistribute the singular values into an [MR,* ] distribution
const Grid& grid = A.Grid();
DistMatrix<R,MR,STAR> s_MR_STAR( grid );
s_MR_STAR.AlignWith( V.DistData() );
s_MR_STAR = s;
// Set the singular values to the absolute value of the eigenvalues
const Int numLocalVals = s.LocalHeight();
for( Int iLoc=0; iLoc<numLocalVals; ++iLoc )
{
const R sigma = s.GetLocal(iLoc,0);
s.SetLocal(iLoc,0,Abs(sigma));
}
// Copy V into U (flipping the sign as necessary)
U.AlignWith( V );
U.ResizeTo( V.Height(), V.Width() );
const Int localHeight = V.LocalHeight();
const Int localWidth = V.LocalWidth();
for( Int jLoc=0; jLoc<localWidth; ++jLoc )
{
const R sigma = s_MR_STAR.GetLocal( jLoc, 0 );
F* UCol = U.Buffer( 0, jLoc );
const F* VCol = V.LockedBuffer( 0, jLoc );
if( sigma >= 0 )
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
UCol[iLoc] = VCol[iLoc];
else
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
UCol[iLoc] = -VCol[iLoc];
}
#else
U = A;
MakeHermitian( uplo, U );
SVD( U, s, V );
#endif // ifdef HAVE_PMRRR
}
示例6: AssertSameGrids
void Filter
( const DistMatrix<T,Collect<U>(),Collect<V>()>& A,
DistMatrix<T, U, V >& B )
{
DEBUG_CSE
AssertSameGrids( A, B );
B.Resize( A.Height(), A.Width() );
if( !B.Participating() )
return;
const Int colShift = B.ColShift();
const Int rowShift = B.RowShift();
util::InterleaveMatrix
( B.LocalHeight(), B.LocalWidth(),
A.LockedBuffer(colShift,rowShift), B.ColStride(), B.RowStride()*A.LDim(),
B.Buffer(), 1, B.LDim() );
}
示例7: dProx
void GetMappedDiagonal
( const DistMatrix<T,U,V>& A,
AbstractDistMatrix<S>& dPre,
function<S(const T&)> func,
Int offset )
{
EL_DEBUG_CSE
EL_DEBUG_ONLY(AssertSameGrids( A, dPre ))
ElementalProxyCtrl ctrl;
ctrl.colConstrain = true;
ctrl.colAlign = A.DiagonalAlign(offset);
ctrl.rootConstrain = true;
ctrl.root = A.DiagonalRoot(offset);
DistMatrixWriteProxy<S,S,DiagCol<U,V>(),DiagRow<U,V>()> dProx( dPre, ctrl );
auto& d = dProx.Get();
d.Resize( A.DiagonalLength(offset), 1 );
if( d.Participating() )
{
const Int diagShift = d.ColShift();
const Int iStart = diagShift + Max(-offset,0);
const Int jStart = diagShift + Max( offset,0);
const Int colStride = A.ColStride();
const Int rowStride = A.RowStride();
const Int iLocStart = (iStart-A.ColShift()) / colStride;
const Int jLocStart = (jStart-A.RowShift()) / rowStride;
const Int iLocStride = d.ColStride() / colStride;
const Int jLocStride = d.ColStride() / rowStride;
const Int localDiagLength = d.LocalHeight();
S* dBuf = d.Buffer();
const T* ABuf = A.LockedBuffer();
const Int ldim = A.LDim();
EL_PARALLEL_FOR
for( Int k=0; k<localDiagLength; ++k )
{
const Int iLoc = iLocStart + k*iLocStride;
const Int jLoc = jLocStart + k*jLocStride;
dBuf[k] = func(ABuf[iLoc+jLoc*ldim]);
}
}
}
示例8: PushCallStack
inline void AddInLocalData
( const DistMatrix<F,VC,STAR>& X1, DistMatrix<F,STAR,STAR>& Z )
{
#ifndef RELEASE
PushCallStack("internal::AddInLocalData");
#endif
const int width = X1.Width();
const int localHeight = X1.LocalHeight();
const int stride = X1.Grid().Size();
const int offset = X1.ColShift();
for( int j=0; j<width; ++j )
{
F* ZColBuffer = Z.Buffer(0,j);
const F* X1ColBuffer = X1.LockedBuffer(0,j);
for( int iLocal=0; iLocal<localHeight; ++iLocal )
ZColBuffer[offset+stride*iLocal] += X1ColBuffer[iLocal];
}
#ifndef RELEASE
PopCallStack();
#endif
}
示例9: AccumulateRHS
void AccumulateRHS( const DistMatrix<F,VC,STAR>& X, DistMatrix<F,STAR,STAR>& Z )
{
const Int height = X.Height();
const Int width = X.Width();
Z.Empty();
Zeros( Z, height, width );
const Int localHeight = X.LocalHeight();
const Int colShift = X.ColShift();
const int commSize = X.Grid().Size();
const F* XBuffer = X.LockedBuffer();
F* ZBuffer = Z.Buffer();
const Int XLDim = X.LDim();
const Int ZLDim = Z.LDim();
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
{
const Int i = colShift + iLoc*commSize;
for( Int j=0; j<width; ++j )
ZBuffer[i+j*ZLDim] = XBuffer[iLoc+j*XLDim];
}
mpi::AllReduce( ZBuffer, ZLDim*width, mpi::SUM, X.Grid().VCComm() );
}
示例10: Blocksize
void FormDiagonalBlocks
( const DistMatrix<F,VC,STAR>& L, DistMatrix<F,STAR,STAR>& D, bool conjugate )
{
const Grid& g = L.Grid();
const Int height = L.Width();
const Int blocksize = Blocksize();
const int commRank = g.VCRank();
const int commSize = g.Size();
const Int localHeight = Length(height,commRank,commSize);
const Int maxLocalHeight = MaxLength(height,commSize);
const Int portionSize = maxLocalHeight*blocksize;
std::vector<F> sendBuffer( portionSize );
const Int colShift = L.ColShift();
const Int LLDim = L.LDim();
const F* LBuffer = L.LockedBuffer();
if( conjugate )
{
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
{
const Int i = colShift + iLoc*commSize;
const Int block = i / blocksize;
const Int jStart = block*blocksize;
const Int b = std::min(height-jStart,blocksize);
for( Int jOff=0; jOff<b; ++jOff )
sendBuffer[iLoc*blocksize+jOff] =
Conj(LBuffer[iLoc+(jStart+jOff)*LLDim]);
}
}
else
{
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
{
const Int i = colShift + iLoc*commSize;
const Int block = i / blocksize;
const Int jStart = block*blocksize;
const Int b = std::min(height-jStart,blocksize);
for( Int jOff=0; jOff<b; ++jOff )
sendBuffer[iLoc*blocksize+jOff] =
LBuffer[iLoc+(jStart+jOff)*LLDim];
}
}
std::vector<F> recvBuffer( portionSize*commSize );
mpi::AllGather
( &sendBuffer[0], portionSize, &recvBuffer[0], portionSize, g.VCComm() );
SwapClear( sendBuffer );
D.Resize( blocksize, height );
F* DBuffer = D.Buffer();
const Int DLDim = D.LDim();
for( Int proc=0; proc<commSize; ++proc )
{
const F* procRecv = &recvBuffer[proc*portionSize];
const Int procLocalHeight = Length(height,proc,commSize);
for( Int iLoc=0; iLoc<procLocalHeight; ++iLoc )
{
const Int i = proc + iLoc*commSize;
for( Int jOff=0; jOff<blocksize; ++jOff )
DBuffer[jOff+i*DLDim] = procRecv[jOff+iLoc*blocksize];
}
}
}
示例11: AssertSameGrids
void ColAllToAllPromote
( const DistMatrix<T, U, V >& A,
DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& B )
{
DEBUG_CSE
AssertSameGrids( A, B );
const Int height = A.Height();
const Int width = A.Width();
B.AlignColsAndResize
( Mod(A.ColAlign(),B.ColStride()), height, width, false, false );
if( !B.Participating() )
return;
const Int colStride = A.ColStride();
const Int colStridePart = A.PartialColStride();
const Int colStrideUnion = A.PartialUnionColStride();
const Int colRankPart = A.PartialColRank();
const Int colDiff = B.ColAlign() - Mod(A.ColAlign(),colStridePart);
const Int maxLocalHeight = MaxLength(height,colStride);
const Int maxLocalWidth = MaxLength(width,colStrideUnion);
const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
if( colDiff == 0 )
{
if( A.PartialUnionColStride() == 1 )
{
Copy( A.LockedMatrix(), B.Matrix() );
}
else
{
vector<T> buffer;
FastResize( buffer, 2*colStrideUnion*portionSize );
T* firstBuf = &buffer[0];
T* secondBuf = &buffer[colStrideUnion*portionSize];
// Pack
util::RowStridedPack
( A.LocalHeight(), width,
B.RowAlign(), colStrideUnion,
A.LockedBuffer(), A.LDim(),
firstBuf, portionSize );
// Simultaneously Gather in columns and Scatter in rows
mpi::AllToAll
( firstBuf, portionSize,
secondBuf, portionSize, A.PartialUnionColComm() );
// Unpack
util::PartialColStridedUnpack
( height, B.LocalWidth(),
A.ColAlign(), colStride,
colStrideUnion, colStridePart, colRankPart,
B.ColShift(),
secondBuf, portionSize,
B.Buffer(), B.LDim() );
}
}
else
{
#ifdef EL_UNALIGNED_WARNINGS
if( A.Grid().Rank() == 0 )
cerr << "Unaligned PartialColAllToAllPromote" << endl;
#endif
const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart );
const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart );
vector<T> buffer;
FastResize( buffer, 2*colStrideUnion*portionSize );
T* firstBuf = &buffer[0];
T* secondBuf = &buffer[colStrideUnion*portionSize];
// Pack
util::RowStridedPack
( A.LocalHeight(), width,
B.RowAlign(), colStrideUnion,
A.LockedBuffer(), A.LDim(),
secondBuf, portionSize );
// Realign the input
mpi::SendRecv
( secondBuf, colStrideUnion*portionSize, sendColRankPart,
firstBuf, colStrideUnion*portionSize, recvColRankPart,
A.PartialColComm() );
// Simultaneously Scatter in columns and Gather in rows
mpi::AllToAll
( firstBuf, portionSize,
secondBuf, portionSize, A.PartialUnionColComm() );
// Unpack
util::PartialColStridedUnpack
( height, B.LocalWidth(),
A.ColAlign(), colStride,
colStrideUnion, colStridePart, recvColRankPart,
B.ColShift(),
secondBuf, portionSize,
B.Buffer(), B.LDim() );
}
//.........这里部分代码省略.........
示例12: cse
//.........这里部分代码省略.........
// necessary processes at each step.
Int requiredMemory = 0;
if( inAGrid )
requiredMemory += maxSendSize;
if( inBGrid )
requiredMemory += maxSendSize;
vector<T> auxBuf( requiredMemory );
Int offset = 0;
T* sendBuf = &auxBuf[offset];
if( inAGrid )
offset += maxSendSize;
T* recvBuf = &auxBuf[offset];
Int recvRow = 0; // avoid compiler warnings...
if( inAGrid )
recvRow = Mod(Mod(A.ColRank()-colAlignA,colStrideA)+colAlign,colStride);
for( Int colSend=0; colSend<numColSends; ++colSend )
{
Int recvCol = 0; // avoid compiler warnings...
if( inAGrid )
recvCol=Mod(Mod(A.RowRank()-rowAlignA,rowStrideA)+rowAlign,
rowStride);
for( Int rowSend=0; rowSend<numRowSends; ++rowSend )
{
mpi::Request sendRequest;
// Fire off this round of non-blocking sends
if( inAGrid )
{
// Pack the data
Int sendHeight = Length(A.LocalHeight(),colSend,numColSends);
Int sendWidth = Length(A.LocalWidth(),rowSend,numRowSends);
copy::util::InterleaveMatrix
( sendHeight, sendWidth,
A.LockedBuffer(colSend,rowSend),
numColSends, numRowSends*A.LDim(),
sendBuf, 1, sendHeight );
// Send data
const Int recvVCRank = recvRow + recvCol*colStride;
const Int recvViewingRank = B.Grid().VCToViewing( recvVCRank );
mpi::ISend
( sendBuf, sendHeight*sendWidth, recvViewingRank,
B.Grid().ViewingComm(), sendRequest );
}
// Perform this round of recv's
if( inBGrid )
{
const Int sendColOffset = colAlignA;
const Int recvColOffset =
(colSend*colStrideA+colAlign) % colStride;
const Int sendRowOffset = rowAlignA;
const Int recvRowOffset =
(rowSend*rowStrideA+rowAlign) % rowStride;
const Int firstSendRow =
Mod( Mod(colRank-recvColOffset,colStride)+sendColOffset,
colStrideA );
const Int firstSendCol =
Mod( Mod(rowRank-recvRowOffset,rowStride)+sendRowOffset,
rowStrideA );
const Int colShift = Mod( colRank-recvColOffset, colStride );
const Int rowShift = Mod( rowRank-recvRowOffset, rowStride );
const Int numColRecvs = Length( colStrideA, colShift, colStride );
const Int numRowRecvs = Length( rowStrideA, rowShift, rowStride );
// Recv data
示例13: entry
inline void
Cannon_NN
( T alpha, const DistMatrix<T>& A,
const DistMatrix<T>& B,
T beta, DistMatrix<T>& C )
{
#ifndef RELEASE
CallStackEntry entry("gemm::Cannon_NN");
if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
LogicError("{A,B,C} must have the same grid");
if( A.Height() != C.Height() ||
B.Width() != C.Width() ||
A.Width() != B.Height() )
{
std::ostringstream msg;
msg << "Nonconformal matrices: \n"
<< " A ~ " << A.Height() << " x " << A.Width() << "\n"
<< " B ~ " << B.Height() << " x " << B.Width() << "\n"
<< " C ~ " << C.Height() << " x " << C.Width() << "\n";
LogicError( msg.str() );
}
#endif
const Grid& g = A.Grid();
if( g.Height() != g.Width() )
LogicError("Process grid must be square for Cannon's");
if( C.ColAlignment() != A.ColAlignment() ||
C.RowAlignment() != B.RowAlignment() )
LogicError("C is not properly aligned");
const Int row = g.Row();
const Int col = g.Col();
const Int pSqrt = g.Height();
mpi::Comm rowComm = g.RowComm();
mpi::Comm colComm = g.ColComm();
if( A.Width() % pSqrt != 0 )
LogicError("For now, width(A) must be integer multiple of sqrt(p)");
// Begin by scaling our local portion of C
Scale( beta, C );
// Load the initial A and B packages (may want to transpose B...)
const Int localHeightA = A.LocalHeight();
const Int localHeightB = B.LocalHeight();
const Int localWidthA = A.LocalWidth();
const Int localWidthB = B.LocalWidth();
Matrix<T> pkgA(localHeightA,localWidthA,localHeightA),
pkgB(localHeightB,localWidthB,localHeightB);
for( Int jLoc=0; jLoc<localWidthA; ++jLoc )
MemCopy
( pkgA.Buffer(0,jLoc), A.LockedBuffer(0,jLoc), localHeightA );
for( Int jLoc=0; jLoc<localWidthB; ++jLoc )
MemCopy
( pkgB.Buffer(0,jLoc), B.LockedBuffer(0,jLoc), localHeightB );
// Perform the initial circular shifts so that our A and B packages align
const Int rowShiftA = A.RowShift();
const Int colShiftB = B.ColShift();
const Int leftInitA = (col+pSqrt-colShiftB) % pSqrt;
const Int rightInitA = (col+colShiftB) % pSqrt;
const Int aboveInitB = (row+pSqrt-rowShiftA) % pSqrt;
const Int belowInitB = (row+rowShiftA) % pSqrt;
const Int pkgSizeA = localHeightA*localWidthA;
const Int pkgSizeB = localHeightB*localWidthB;
mpi::SendRecv( pkgA.Buffer(), pkgSizeA, leftInitA, rightInitA, rowComm );
mpi::SendRecv( pkgB.Buffer(), pkgSizeB, aboveInitB, belowInitB, colComm );
// Now begin the data flow
const Int aboveRow = (row+pSqrt-1) % pSqrt;
const Int belowRow = (row+1) % pSqrt;
const Int leftCol = (col+pSqrt-1) % pSqrt;
const Int rightCol = (col+1) % pSqrt;
for( Int q=0; q<pSqrt; ++q )
{
Gemm( NORMAL, NORMAL, alpha, pkgA, pkgB, T(1), C.Matrix() );
if( q != pSqrt-1 )
{
mpi::SendRecv
( pkgA.Buffer(), pkgSizeA, leftCol, rightCol, rowComm );
mpi::SendRecv
( pkgB.Buffer(), pkgSizeB, aboveRow, belowRow, colComm );
}
}
}
示例14: cse
void AllGather
( const DistMatrix<T, U, V >& A,
DistMatrix<T,Collect<U>(),Collect<V>()>& B )
{
DEBUG_ONLY(CSE cse("copy::AllGather"))
AssertSameGrids( A, B );
const Int height = A.Height();
const Int width = A.Width();
B.SetGrid( A.Grid() );
B.Resize( height, width );
if( A.Participating() )
{
const Int colStride = A.ColStride();
const Int rowStride = A.RowStride();
const Int distStride = colStride*rowStride;
const Int maxLocalHeight = MaxLength(height,colStride);
const Int maxLocalWidth = MaxLength(width,rowStride);
const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
vector<T> buf( (distStride+1)*portionSize );
T* sendBuf = &buf[0];
T* recvBuf = &buf[portionSize];
// Pack
util::InterleaveMatrix
( A.LocalHeight(), A.LocalWidth(),
A.LockedBuffer(), 1, A.LDim(),
sendBuf, 1, A.LocalHeight() );
// Communicate
mpi::AllGather
( sendBuf, portionSize, recvBuf, portionSize, A.DistComm() );
// Unpack
util::StridedUnpack
( height, width,
A.ColAlign(), colStride,
A.RowAlign(), rowStride,
recvBuf, portionSize,
B.Buffer(), B.LDim() );
}
if( A.Grid().InGrid() && A.CrossComm() != mpi::COMM_SELF )
{
// Pack from the root
const Int BLocalHeight = B.LocalHeight();
const Int BLocalWidth = B.LocalWidth();
vector<T> buf(BLocalHeight*BLocalWidth);
if( A.CrossRank() == A.Root() )
util::InterleaveMatrix
( BLocalHeight, BLocalWidth,
B.LockedBuffer(), 1, B.LDim(),
buf.data(), 1, BLocalHeight );
// Broadcast from the root
mpi::Broadcast
( buf.data(), BLocalHeight*BLocalWidth, A.Root(), A.CrossComm() );
// Unpack if not the root
if( A.CrossRank() != A.Root() )
util::InterleaveMatrix
( BLocalHeight, BLocalWidth,
buf.data(), 1, BLocalHeight,
B.Buffer(), 1, B.LDim() );
}
}
示例15: cse
void ColAllToAllDemote
( const DistMatrix<T,Partial<U>(),PartialUnionRow<U,V>()>& A,
DistMatrix<T, U, V >& B )
{
DEBUG_ONLY(CallStackEntry cse("copy::ColAllToAllDemote"))
AssertSameGrids( A, B );
const Int height = A.Height();
const Int width = A.Width();
B.AlignColsAndResize( A.ColAlign(), height, width, false, false );
if( !B.Participating() )
return;
const Int colAlign = B.ColAlign();
const Int rowAlignA = A.RowAlign();
const Int colStride = B.ColStride();
const Int colStridePart = B.PartialColStride();
const Int colStrideUnion = B.PartialUnionColStride();
const Int colRankPart = B.PartialColRank();
const Int colDiff = (colAlign%colStridePart) - A.ColAlign();
const Int colShiftA = A.ColShift();
const Int localHeightB = B.LocalHeight();
const Int localWidthA = A.LocalWidth();
const Int maxLocalHeight = MaxLength(height,colStride);
const Int maxLocalWidth = MaxLength(width,colStrideUnion);
const Int portionSize = mpi::Pad( maxLocalHeight*maxLocalWidth );
std::vector<T> buffer( 2*colStrideUnion*portionSize );
T* firstBuf = &buffer[0];
T* secondBuf = &buffer[colStrideUnion*portionSize];
if( colDiff == 0 )
{
// Pack
util::PartialColStridedPack
( height, localWidthA,
colAlign, colStride,
colStrideUnion, colStridePart, colRankPart,
colShiftA,
A.LockedBuffer(), A.LDim(),
firstBuf, portionSize );
// Simultaneously Scatter in columns and Gather in rows
mpi::AllToAll
( firstBuf, portionSize,
secondBuf, portionSize, B.PartialUnionColComm() );
// Unpack
util::RowStridedUnpack
( localHeightB, width,
rowAlignA, colStrideUnion,
secondBuf, portionSize,
B.Buffer(), B.LDim() );
}
else
{
#ifdef EL_UNALIGNED_WARNINGS
if( B.Grid().Rank() == 0 )
std::cerr << "Unaligned ColAllToAllDemote" << std::endl;
#endif
const Int sendColRankPart = Mod( colRankPart+colDiff, colStridePart );
const Int recvColRankPart = Mod( colRankPart-colDiff, colStridePart );
// Pack
util::PartialColStridedPack
( height, localWidthA,
colAlign, colStride,
colStrideUnion, colStridePart, sendColRankPart,
colShiftA,
A.LockedBuffer(), A.LDim(),
secondBuf, portionSize );
// Simultaneously Scatter in columns and Gather in rows
mpi::AllToAll
( secondBuf, portionSize,
firstBuf, portionSize, B.PartialUnionColComm() );
// Realign the result
mpi::SendRecv
( firstBuf, colStrideUnion*portionSize, sendColRankPart,
secondBuf, colStrideUnion*portionSize, recvColRankPart,
B.PartialColComm() );
// Unpack
util::RowStridedUnpack
( localHeightB, width,
rowAlignA, colStrideUnion,
secondBuf, portionSize,
B.Buffer(), B.LDim() );
}
}