本文整理汇总了C++中DistMatrix::ColAlignment方法的典型用法代码示例。如果您正苦于以下问题:C++ DistMatrix::ColAlignment方法的具体用法?C++ DistMatrix::ColAlignment怎么用?C++ DistMatrix::ColAlignment使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DistMatrix
的用法示例。
在下文中一共展示了DistMatrix::ColAlignment方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: logic_error
inline void
CheckInput
( const DistMatrix<T,MC, STAR>& A,
const DistMatrix<T,STAR,MR >& B,
const DistMatrix<T,MC, MR >& C )
{
if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
throw std::logic_error
("A, B, and C must be distributed over the same grid");
if( A.Height() != C.Height() || B.Width() != C.Width() ||
A.Width() != B.Height() || A.Height() != B.Width() )
{
std::ostringstream msg;
msg << "Nonconformal LocalTrrk: \n"
<< " A[MC,* ] ~ " << A.Height() << " x "
<< A.Width() << "\n"
<< " B[* ,MR] ~ " << B.Height() << " x "
<< B.Width() << "\n"
<< " C[MC,MR] ~ " << C.Height() << " x " << C.Width() << "\n";
throw std::logic_error( msg.str().c_str() );
}
if( A.ColAlignment() != C.ColAlignment() ||
B.RowAlignment() != C.RowAlignment() )
{
std::ostringstream msg;
msg << "Misaligned LocalTrrk: \n"
<< " A[MC,* ] ~ " << A.ColAlignment() << "\n"
<< " B[* ,MR] ~ " << B.RowAlignment() << "\n"
<< " C[MC,MR] ~ " << C.ColAlignment() << " , " <<
C.RowAlignment() << "\n";
throw std::logic_error( msg.str().c_str() );
}
}
示例2: entry
inline F
Reflector( DistMatrix<F>& chi, DistMatrix<F>& x )
{
#ifndef RELEASE
CallStackEntry entry("Reflector");
if( chi.Grid() != x.Grid() )
LogicError("chi and x must be distributed over the same grid");
if( chi.Height() != 1 || chi.Width() != 1 )
LogicError("chi must be a scalar");
if( x.Height() != 1 && x.Width() != 1 )
LogicError("x must be a vector");
#endif
const Grid& g = x.Grid();
F tau;
if( x.Width() == 1 && x.RowAlignment() == chi.RowAlignment() )
{
if( g.Col() == x.RowAlignment() )
tau = reflector::Col( chi, x );
mpi::Broadcast( tau, x.RowAlignment(), g.RowComm() );
}
else
{
if( g.Row() == x.ColAlignment() )
tau = reflector::Row( chi, x );
mpi::Broadcast( tau, x.ColAlignment(), g.ColComm() );
}
return tau;
}
示例3: entry
const DistMatrix<T,STAR,STAR>&
DistMatrix<T,STAR,STAR>::operator=( const DistMatrix<T,VR,STAR>& A )
{
#ifndef RELEASE
CallStackEntry entry("[* ,* ] = [VR,* ]");
this->AssertNotLocked();
this->AssertSameGrid( A.Grid() );
#endif
const elem::Grid& g = this->Grid();
this->ResizeTo( A.Height(), A.Width() );
if( !this->Participating() )
return *this;
const Int p = g.Size();
const Int height = this->Height();
const Int width = this->Width();
const Int localHeightOfA = A.LocalHeight();
const Int maxLocalHeight = MaxLength(height,p);
const Int portionSize = mpi::Pad( maxLocalHeight*width );
T* buffer = this->auxMemory_.Require( (p+1)*portionSize );
T* sendBuf = &buffer[0];
T* recvBuf = &buffer[portionSize];
// Pack
const Int ALDim = A.LDim();
const T* ABuf = A.LockedBuffer();
PARALLEL_FOR
for( Int j=0; j<width; ++j )
MemCopy
( &sendBuf[j*localHeightOfA], &ABuf[j*ALDim], localHeightOfA );
// Communicate
mpi::AllGather
( sendBuf, portionSize,
recvBuf, portionSize, g.VRComm() );
// Unpack
T* thisBuf = this->Buffer();
const Int thisLDim = this->LDim();
const Int colAlignmentOfA = A.ColAlignment();
OUTER_PARALLEL_FOR
for( Int k=0; k<p; ++k )
{
const T* data = &recvBuf[k*portionSize];
const Int colShift = Shift_( k, colAlignmentOfA, p );
const Int localHeight = Length_( height, colShift, p );
INNER_PARALLEL_FOR
for( Int j=0; j<width; ++j )
{
T* destCol = &thisBuf[colShift+j*thisLDim];
const T* sourceCol = &data[j*localHeight];
for( Int iLoc=0; iLoc<localHeight; ++iLoc )
destCol[iLoc*p] = sourceCol[iLoc];
}
}
this->auxMemory_.Release();
return *this;
}
示例4: PushCallStack
inline void
DiagonalScale
( LeftOrRight side, Orientation orientation,
const DistMatrix<typename Base<T>::type,U,V>& d, DistMatrix<T,W,Z>& X )
{
#ifndef RELEASE
PushCallStack("DiagonalScale");
#endif
typedef typename Base<T>::type R;
if( side == LEFT )
{
if( U == W && V == STAR && d.ColAlignment() == X.ColAlignment() )
{
DiagonalScale( LEFT, orientation, d.LockedMatrix(), X.Matrix() );
}
else
{
DistMatrix<R,W,STAR> d_W_STAR( X.Grid() );
d_W_STAR = d;
DiagonalScale
( LEFT, orientation, d_W_STAR.LockedMatrix(), X.Matrix() );
}
}
else
{
if( U == Z && V == STAR && d.ColAlignment() == X.RowAlignment() )
{
DiagonalScale( RIGHT, orientation, d.LockedMatrix(), X.Matrix() );
}
else
{
DistMatrix<R,Z,STAR> d_Z_STAR( X.Grid() );
d_Z_STAR = d;
DiagonalScale
( RIGHT, orientation, d_Z_STAR.LockedMatrix(), X.Matrix() );
}
}
#ifndef RELEASE
PopCallStack();
#endif
}
示例5: PushCallStack
inline void
DistMatrix<T,MD,STAR,Int>::AlignWithDiagonal
( const DistMatrix<S,MR,MC,N>& A, Int offset )
{
#ifndef RELEASE
PushCallStack("[MD,* ]::AlignWithDiagonal([MR,MC])");
this->AssertFreeColAlignment();
this->AssertSameGrid( A );
#endif
const elem::Grid& g = this->Grid();
const Int r = g.Height();
const Int c = g.Width();
const Int lcm = g.LCM();
const Int colAlignment = A.ColAlignment();
const Int rowAlignment = A.RowAlignment();
this->Empty();
Int owner;
if( offset >= 0 )
{
const Int ownerRow = rowAlignment;
const Int ownerCol = (colAlignment + offset) % c;
owner = ownerRow + r*ownerCol;
}
else
{
const Int ownerRow = (rowAlignment-offset) % r;
const Int ownerCol = colAlignment;
owner = ownerRow + r*ownerCol;
}
this->diagPath_ = g.DiagPath(owner);
this->colAlignment_ = g.DiagPathRank(owner);
this->constrainedColAlignment_ = true;
if( this->Participating() )
this->colShift_ = (g.DiagPathRank()+lcm-this->colAlignment_) % lcm;
else
this->colShift_ = 0;
#ifndef RELEASE
PopCallStack();
#endif
}
示例6: PushCallStack
inline void
internal::CholeskyUVar3Square( DistMatrix<F,MC,MR>& A )
{
#ifndef RELEASE
PushCallStack("internal::CholeskyUVar3Square");
if( A.Height() != A.Width() )
throw std::logic_error
("Can only compute Cholesky factor of square matrices.");
if( A.Grid().Height() != A.Grid().Width() )
throw std::logic_error
("CholeskyUVar3Square assumes a square process grid.");
#endif
const Grid& g = A.Grid();
// Find the process holding our transposed data
const int r = g.Height();
int transposeRank;
{
const int colAlignment = A.ColAlignment();
const int rowAlignment = A.RowAlignment();
const int colShift = A.ColShift();
const int rowShift = A.RowShift();
const int transposeRow = (colAlignment+rowShift) % r;
const int transposeCol = (rowAlignment+colShift) % r;
transposeRank = transposeRow + r*transposeCol;
}
const bool onDiagonal = ( transposeRank == g.VCRank() );
// Matrix views
DistMatrix<F,MC,MR>
ATL(g), ATR(g), A00(g), A01(g), A02(g),
ABL(g), ABR(g), A10(g), A11(g), A12(g),
A20(g), A21(g), A22(g);
// Temporary matrix distributions
DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
DistMatrix<F,STAR,VR > A12_STAR_VR(g);
DistMatrix<F,STAR,MC > A12_STAR_MC(g);
DistMatrix<F,STAR,MR > A12_STAR_MR(g);
// Start the algorithm
PartitionDownDiagonal
( A, ATL, ATR,
ABL, ABR, 0 );
while( ABR.Height() > 0 )
{
RepartitionDownDiagonal
( ATL, /**/ ATR, A00, /**/ A01, A02,
/*************/ /******************/
/**/ A10, /**/ A11, A12,
ABL, /**/ ABR, A20, /**/ A21, A22 );
A12_STAR_MC.AlignWith( A22 );
A12_STAR_MR.AlignWith( A22 );
A12_STAR_VR.AlignWith( A22 );
//--------------------------------------------------------------------//
A11_STAR_STAR = A11;
internal::LocalCholesky( UPPER, A11_STAR_STAR );
A11 = A11_STAR_STAR;
A12_STAR_VR = A12;
internal::LocalTrsm
( LEFT, UPPER, ADJOINT, NON_UNIT, (F)1, A11_STAR_STAR, A12_STAR_VR );
A12_STAR_MR = A12_STAR_VR;
// SendRecv to form A12[* ,MC] from A12[* ,MR]
A12_STAR_MC.ResizeTo( A12.Height(), A12.Width() );
{
if( onDiagonal )
{
const int size = A11.Height()*A22.LocalWidth();
MemCopy
( A12_STAR_MC.LocalBuffer(),
A12_STAR_MR.LocalBuffer(), size );
}
else
{
const int sendSize = A11.Height()*A22.LocalWidth();
const int recvSize = A11.Width()*A22.LocalHeight();
// We know that the ldim is the height since we have manually
// created both temporary matrices.
mpi::SendRecv
( A12_STAR_MR.LocalBuffer(), sendSize, transposeRank, 0,
A12_STAR_MC.LocalBuffer(), recvSize, transposeRank, 0,
g.VCComm() );
}
}
internal::LocalTrrk
( UPPER, ADJOINT, (F)-1, A12_STAR_MC, A12_STAR_MR, (F)1, A22 );
A12 = A12_STAR_MR;
//--------------------------------------------------------------------//
A12_STAR_MC.FreeAlignments();
A12_STAR_MR.FreeAlignments();
A12_STAR_VR.FreeAlignments();
SlidePartitionDownDiagonal
( ATL, /**/ ATR, A00, A01, /**/ A02,
/**/ A10, A11, /**/ A12,
/*************/ /******************/
//.........这里部分代码省略.........
示例7: logic_error
inline void
PanelLU
( DistMatrix<F, STAR,STAR>& A,
DistMatrix<F, MC, STAR>& B,
DistMatrix<int,STAR,STAR>& p,
int pivotOffset )
{
#ifndef RELEASE
PushCallStack("internal::PanelLU");
if( A.Grid() != p.Grid() || p.Grid() != B.Grid() )
throw std::logic_error
("Matrices must be distributed over the same grid");
if( A.Width() != B.Width() )
throw std::logic_error("A and B must be the same width");
if( A.Height() != p.Height() || p.Width() != 1 )
throw std::logic_error("p must be a vector that conforms with A");
#endif
const Grid& g = A.Grid();
const int r = g.Height();
const int colShift = B.ColShift();
const int colAlignment = B.ColAlignment();
// Matrix views
DistMatrix<F,STAR,STAR>
ATL(g), ATR(g), A00(g), a01(g), A02(g),
ABL(g), ABR(g), a10(g), alpha11(g), a12(g),
A20(g), a21(g), A22(g);
DistMatrix<F,MC,STAR>
BL(g), BR(g),
B0(g), b1(g), B2(g);
const int width = A.Width();
const int numBytes = (width+1)*sizeof(F)+sizeof(int);
std::vector<byte> sendData(numBytes);
std::vector<byte> recvData(numBytes);
// Extract pointers to send and recv data
// TODO: Think of how to make this safer with respect to alignment issues
F* sendBufFloat = (F*)&sendData[0];
F* recvBufFloat = (F*)&recvData[0];
int* sendBufInt = (int*)&sendData[(width+1)*sizeof(F)];
int* recvBufInt = (int*)&recvData[(width+1)*sizeof(F)];
// Start the algorithm
PushBlocksizeStack( 1 );
PartitionDownDiagonal
( A, ATL, ATR,
ABL, ABR, 0 );
PartitionRight( B, BL, BR, 0 );
while( ATL.Height() < A.Height() )
{
RepartitionDownDiagonal
( ATL, /**/ ATR, A00, /**/ a01, A02,
/*************/ /**********************/
/**/ a10, /**/ alpha11, a12,
ABL, /**/ ABR, A20, /**/ a21, A22 );
RepartitionRight
( BL, /**/ BR,
B0, /**/ b1, B2 );
//--------------------------------------------------------------------//
const int currentRow = a01.Height();
// Store the index/value of the pivot candidate in A
F pivot = alpha11.GetLocal(0,0);
int pivotRow = currentRow;
for( int i=0; i<a21.Height(); ++i )
{
F value = a21.GetLocal(i,0);
if( FastAbs(value) > FastAbs(pivot) )
{
pivot = value;
pivotRow = currentRow + i + 1;
}
}
// Update the pivot candidate to include local data from B
for( int i=0; i<B.LocalHeight(); ++i )
{
F value = b1.GetLocal(i,0);
if( FastAbs(value) > FastAbs(pivot) )
{
pivot = value;
pivotRow = A.Height() + colShift + i*r;
}
}
// Fill the send buffer with:
// [ pivotValue | pivot row data | pivotRow ]
if( pivotRow < A.Height() )
{
sendBufFloat[0] = A.GetLocal(pivotRow,a10.Width());
const int ALDim = A.LocalLDim();
const F* ABuffer = A.LocalBuffer(pivotRow,0);
for( int j=0; j<width; ++j )
sendBufFloat[j+1] = ABuffer[j*ALDim];
}
//.........这里部分代码省略.........
示例8: logic_error
inline void
Var3( Orientation orientation, DistMatrix<F>& A, DistMatrix<F,MC,STAR>& d )
{
#ifndef RELEASE
PushCallStack("ldl::Var3");
if( orientation == NORMAL )
throw std::logic_error("Can only perform LDL^T and LDL^H");
if( A.Height() != A.Width() )
throw std::logic_error("A must be square");
if( A.Grid() != d.Grid() )
throw std::logic_error("A and d must use the same grid");
if( d.Viewing() && (d.Height() != A.Height() || d.Width() != 1) )
throw std::logic_error
("d must be a column vector of the same height as A");
if( d.Viewing() && d.ColAlignment() != A.ColAlignment() )
throw std::logic_error("d must be aligned with A");
#endif
const Grid& g = A.Grid();
if( !d.Viewing() )
{
d.AlignWith( A );
d.ResizeTo( A.Height(), 1 );
}
// Matrix views
DistMatrix<F>
ATL(g), ATR(g), A00(g), A01(g), A02(g),
ABL(g), ABR(g), A10(g), A11(g), A12(g),
A20(g), A21(g), A22(g);
DistMatrix<F,MC,STAR>
dT(g), d0(g),
dB(g), d1(g),
d2(g);
// Temporary matrices
DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
DistMatrix<F,STAR,STAR> d1_STAR_STAR(g);
DistMatrix<F,VC, STAR> A21_VC_STAR(g);
DistMatrix<F,VR, STAR> A21_VR_STAR(g);
DistMatrix<F,STAR,MC > S21Trans_STAR_MC(g);
DistMatrix<F,STAR,MR > A21AdjOrTrans_STAR_MR(g);
const bool conjugate = ( orientation == ADJOINT );
// Start the algorithm
PartitionDownDiagonal
( A, ATL, ATR,
ABL, ABR, 0 );
PartitionDown
( d, dT,
dB, 0 );
while( ABR.Height() > 0 )
{
RepartitionDownDiagonal
( ATL, /**/ ATR, A00, /**/ A01, A02,
/*************/ /******************/
/**/ A10, /**/ A11, A12,
ABL, /**/ ABR, A20, /**/ A21, A22 );
RepartitionDown
( dT, d0,
/**/ /**/
d1,
dB, d2 );
A21_VC_STAR.AlignWith( A22 );
A21_VR_STAR.AlignWith( A22 );
S21Trans_STAR_MC.AlignWith( A22 );
A21AdjOrTrans_STAR_MR.AlignWith( A22 );
//--------------------------------------------------------------------//
A11_STAR_STAR = A11;
LocalLDL( orientation, A11_STAR_STAR, d1_STAR_STAR );
A11 = A11_STAR_STAR;
d1 = d1_STAR_STAR;
A21_VC_STAR = A21;
LocalTrsm
( RIGHT, LOWER, orientation, UNIT,
F(1), A11_STAR_STAR, A21_VC_STAR );
S21Trans_STAR_MC.TransposeFrom( A21_VC_STAR );
DiagonalSolve( RIGHT, NORMAL, d1_STAR_STAR, A21_VC_STAR );
A21_VR_STAR = A21_VC_STAR;
A21AdjOrTrans_STAR_MR.TransposeFrom( A21_VR_STAR, conjugate );
LocalTrrk
( LOWER, TRANSPOSE,
F(-1), S21Trans_STAR_MC, A21AdjOrTrans_STAR_MR, F(1), A22 );
A21 = A21_VC_STAR;
//--------------------------------------------------------------------//
A21_VC_STAR.FreeAlignments();
A21_VR_STAR.FreeAlignments();
S21Trans_STAR_MC.FreeAlignments();
A21AdjOrTrans_STAR_MR.FreeAlignments();
SlidePartitionDown
( dT, d0,
d1,
/**/ /**/
dB, d2 );
//.........这里部分代码省略.........
示例9: entry
inline void
LocalTrmmAccumulateRUN
( Orientation orientation, UnitOrNonUnit diag, T alpha,
const DistMatrix<T,MC, MR >& U,
const DistMatrix<T,STAR,MC >& X_STAR_MC,
DistMatrix<T,MR, STAR>& ZTrans_MR_STAR )
{
#ifndef RELEASE
CallStackEntry entry("internal::LocalTrmmAccumulateRUN");
if( U.Grid() != X_STAR_MC.Grid() ||
X_STAR_MC.Grid() != ZTrans_MR_STAR.Grid() )
throw std::logic_error
("{U,X,Z} must be distributed over the same grid");
if( U.Height() != U.Width() ||
U.Height() != X_STAR_MC.Width() ||
U.Height() != ZTrans_MR_STAR.Height() )
{
std::ostringstream msg;
msg << "Nonconformal LocalTrmmAccumulateRUN: \n"
<< " U ~ " << U.Height() << " x " << U.Width() << "\n"
<< " X[* ,MC] ~ " << X_STAR_MC.Height() << " x "
<< X_STAR_MC.Width() << "\n"
<< " Z^H/T[MR,* ] ~ " << ZTrans_MR_STAR.Height() << " x "
<< ZTrans_MR_STAR.Width() << "\n";
throw std::logic_error( msg.str().c_str() );
}
if( X_STAR_MC.RowAlignment() != U.ColAlignment() ||
ZTrans_MR_STAR.ColAlignment() != U.RowAlignment() )
throw std::logic_error("Partial matrix distributions are misaligned");
#endif
const Grid& g = U.Grid();
// Matrix views
DistMatrix<T>
UTL(g), UTR(g), U00(g), U01(g), U02(g),
UBL(g), UBR(g), U10(g), U11(g), U12(g),
U20(g), U21(g), U22(g);
DistMatrix<T> D11(g);
DistMatrix<T,STAR,MC>
XL_STAR_MC(g), XR_STAR_MC(g),
X0_STAR_MC(g), X1_STAR_MC(g), X2_STAR_MC(g);
DistMatrix<T,MR,STAR>
ZTTrans_MR_STAR(g), Z0Trans_MR_STAR(g),
ZBTrans_MR_STAR(g), Z1Trans_MR_STAR(g),
Z2Trans_MR_STAR(g);
const int ratio = std::max( g.Height(), g.Width() );
PushBlocksizeStack( ratio*Blocksize() );
LockedPartitionDownDiagonal
( U, UTL, UTR,
UBL, UBR, 0 );
LockedPartitionRight( X_STAR_MC, XL_STAR_MC, XR_STAR_MC, 0 );
PartitionDown
( ZTrans_MR_STAR, ZTTrans_MR_STAR,
ZBTrans_MR_STAR, 0 );
while( UTL.Height() < U.Height() )
{
LockedRepartitionDownDiagonal
( UTL, /**/ UTR, U00, /**/ U01, U02,
/*************/ /******************/
/**/ U10, /**/ U11, U12,
UBL, /**/ UBR, U20, /**/ U21, U22 );
LockedRepartitionRight
( XL_STAR_MC, /**/ XR_STAR_MC,
X0_STAR_MC, /**/ X1_STAR_MC, X2_STAR_MC );
RepartitionDown
( ZTTrans_MR_STAR, Z0Trans_MR_STAR,
/***************/ /***************/
Z1Trans_MR_STAR,
ZBTrans_MR_STAR, Z2Trans_MR_STAR );
D11.AlignWith( U11 );
//--------------------------------------------------------------------//
D11 = U11;
MakeTriangular( UPPER, D11 );
if( diag == UNIT )
SetDiagonal( D11, T(1) );
LocalGemm
( orientation, orientation,
alpha, D11, X1_STAR_MC, T(1), Z1Trans_MR_STAR );
LocalGemm
( orientation, orientation,
alpha, U01, X0_STAR_MC, T(1), Z1Trans_MR_STAR );
//--------------------------------------------------------------------//
D11.FreeAlignments();
SlideLockedPartitionDownDiagonal
( UTL, /**/ UTR, U00, U01, /**/ U02,
/**/ U10, U11, /**/ U12,
/*************/ /******************/
UBL, /**/ UBR, U20, U21, /**/ U22 );
SlideLockedPartitionRight
( XL_STAR_MC, /**/ XR_STAR_MC,
//.........这里部分代码省略.........
示例10: entry
inline void
Cannon_NN
( T alpha, const DistMatrix<T>& A,
const DistMatrix<T>& B,
T beta, DistMatrix<T>& C )
{
#ifndef RELEASE
CallStackEntry entry("gemm::Cannon_NN");
if( A.Grid() != B.Grid() || B.Grid() != C.Grid() )
LogicError("{A,B,C} must have the same grid");
if( A.Height() != C.Height() ||
B.Width() != C.Width() ||
A.Width() != B.Height() )
{
std::ostringstream msg;
msg << "Nonconformal matrices: \n"
<< " A ~ " << A.Height() << " x " << A.Width() << "\n"
<< " B ~ " << B.Height() << " x " << B.Width() << "\n"
<< " C ~ " << C.Height() << " x " << C.Width() << "\n";
LogicError( msg.str() );
}
#endif
const Grid& g = A.Grid();
if( g.Height() != g.Width() )
LogicError("Process grid must be square for Cannon's");
if( C.ColAlignment() != A.ColAlignment() ||
C.RowAlignment() != B.RowAlignment() )
LogicError("C is not properly aligned");
const Int row = g.Row();
const Int col = g.Col();
const Int pSqrt = g.Height();
mpi::Comm rowComm = g.RowComm();
mpi::Comm colComm = g.ColComm();
if( A.Width() % pSqrt != 0 )
LogicError("For now, width(A) must be integer multiple of sqrt(p)");
// Begin by scaling our local portion of C
Scale( beta, C );
// Load the initial A and B packages (may want to transpose B...)
const Int localHeightA = A.LocalHeight();
const Int localHeightB = B.LocalHeight();
const Int localWidthA = A.LocalWidth();
const Int localWidthB = B.LocalWidth();
Matrix<T> pkgA(localHeightA,localWidthA,localHeightA),
pkgB(localHeightB,localWidthB,localHeightB);
for( Int jLoc=0; jLoc<localWidthA; ++jLoc )
MemCopy
( pkgA.Buffer(0,jLoc), A.LockedBuffer(0,jLoc), localHeightA );
for( Int jLoc=0; jLoc<localWidthB; ++jLoc )
MemCopy
( pkgB.Buffer(0,jLoc), B.LockedBuffer(0,jLoc), localHeightB );
// Perform the initial circular shifts so that our A and B packages align
const Int rowShiftA = A.RowShift();
const Int colShiftB = B.ColShift();
const Int leftInitA = (col+pSqrt-colShiftB) % pSqrt;
const Int rightInitA = (col+colShiftB) % pSqrt;
const Int aboveInitB = (row+pSqrt-rowShiftA) % pSqrt;
const Int belowInitB = (row+rowShiftA) % pSqrt;
const Int pkgSizeA = localHeightA*localWidthA;
const Int pkgSizeB = localHeightB*localWidthB;
mpi::SendRecv( pkgA.Buffer(), pkgSizeA, leftInitA, rightInitA, rowComm );
mpi::SendRecv( pkgB.Buffer(), pkgSizeB, aboveInitB, belowInitB, colComm );
// Now begin the data flow
const Int aboveRow = (row+pSqrt-1) % pSqrt;
const Int belowRow = (row+1) % pSqrt;
const Int leftCol = (col+pSqrt-1) % pSqrt;
const Int rightCol = (col+1) % pSqrt;
for( Int q=0; q<pSqrt; ++q )
{
Gemm( NORMAL, NORMAL, alpha, pkgA, pkgB, T(1), C.Matrix() );
if( q != pSqrt-1 )
{
mpi::SendRecv
( pkgA.Buffer(), pkgSizeA, leftCol, rightCol, rowComm );
mpi::SendRecv
( pkgB.Buffer(), pkgSizeB, aboveRow, belowRow, colComm );
}
}
}
示例11: entry
inline void
LocalSymvColAccumulateU
( T alpha,
const DistMatrix<T>& A,
const DistMatrix<T,MC,STAR>& x_MC_STAR,
const DistMatrix<T,MR,STAR>& x_MR_STAR,
DistMatrix<T,MC,STAR>& z_MC_STAR,
DistMatrix<T,MR,STAR>& z_MR_STAR,
bool conjugate=false )
{
#ifndef RELEASE
CallStackEntry entry("internal::LocalSymvColAccumulateU");
if( A.Grid() != x_MC_STAR.Grid() ||
x_MC_STAR.Grid() != x_MR_STAR.Grid() ||
x_MR_STAR.Grid() != z_MC_STAR.Grid() ||
z_MC_STAR.Grid() != z_MR_STAR.Grid() )
LogicError
("{A,x,z} must be distributed over the same grid");
if( x_MC_STAR.Width() != 1 || x_MR_STAR.Width() != 1 ||
z_MC_STAR.Width() != 1 || z_MR_STAR.Width() != 1 )
LogicError("Expected x and z to be column vectors");
if( A.Height() != A.Width() ||
A.Height() != x_MC_STAR.Height() ||
A.Height() != x_MR_STAR.Height() ||
A.Height() != z_MC_STAR.Height() ||
A.Height() != z_MR_STAR.Height() )
{
std::ostringstream msg;
msg << "Nonconformal LocalSymvColAccumulateU: \n"
<< " A ~ " << A.Height() << " x " << A.Width() << "\n"
<< " x[MC,* ] ~ " << x_MC_STAR.Height() << " x "
<< x_MC_STAR.Width() << "\n"
<< " x[MR,* ] ~ " << x_MR_STAR.Height() << " x "
<< x_MR_STAR.Width() << "\n"
<< " z[MC,* ] ~ " << z_MC_STAR.Height() << " x "
<< z_MC_STAR.Width() << "\n"
<< " z[MR,* ] ~ " << z_MR_STAR.Height() << " x "
<< z_MR_STAR.Width() << "\n";
LogicError( msg.str() );
}
if( x_MC_STAR.ColAlignment() != A.ColAlignment() ||
x_MR_STAR.ColAlignment() != A.RowAlignment() ||
z_MC_STAR.ColAlignment() != A.ColAlignment() ||
z_MR_STAR.ColAlignment() != A.RowAlignment() )
LogicError("Partial matrix distributions are misaligned");
#endif
const Grid& g = A.Grid();
const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE );
// Matrix views
DistMatrix<T> A11(g), A12(g);
DistMatrix<T> D11(g);
DistMatrix<T,MC,STAR> x1_MC_STAR(g);
DistMatrix<T,MR,STAR>
xT_MR_STAR(g), x0_MR_STAR(g),
xB_MR_STAR(g), x1_MR_STAR(g),
x2_MR_STAR(g);
DistMatrix<T,MC,STAR> z1_MC_STAR(g);
DistMatrix<T,MR,STAR> z1_MR_STAR(g),
z2_MR_STAR(g);
// We want our local gemvs to be of width blocksize, so we will
// temporarily change to max(r,c) times the current blocksize
const Int ratio = Max( g.Height(), g.Width() );
PushBlocksizeStack( ratio*LocalSymvBlocksize<T>() );
LockedPartitionDown
( x_MR_STAR, xT_MR_STAR,
xB_MR_STAR, 0 );
while( xT_MR_STAR.Height() < x_MR_STAR.Height() )
{
LockedRepartitionDown
( xT_MR_STAR, x0_MR_STAR,
/**********/ /**********/
x1_MR_STAR,
xB_MR_STAR, x2_MR_STAR );
const Int n0 = x0_MR_STAR.Height();
const Int n1 = x1_MR_STAR.Height();
const Int n2 = x2_MR_STAR.Height();
LockedView( A11, A, n0, n0, n1, n1 );
LockedView( A12, A, n0, n0+n1, n1, n2 );
LockedView( x1_MC_STAR, x_MC_STAR, n0, 0, n1, 1 );
View( z1_MC_STAR, z_MC_STAR, n0, 0, n1, 1 );
View( z1_MR_STAR, z_MR_STAR, n0, 0, n1, 1 );
View( z2_MR_STAR, z_MR_STAR, n0+n1, 0, n2, 1 );
D11.AlignWith( A11 );
//--------------------------------------------------------------------//
// TODO: These diagonal block updates can be greatly improved
D11 = A11;
MakeTriangular( UPPER, D11 );
LocalGemv( NORMAL, alpha, D11, x1_MR_STAR, T(1), z1_MC_STAR );
SetDiagonal( D11, T(0) );
LocalGemv( orientation, alpha, D11, x1_MC_STAR, T(1), z1_MR_STAR );
LocalGemv( NORMAL, alpha, A12, x2_MR_STAR, T(1), z1_MC_STAR );
LocalGemv( orientation, alpha, A12, x1_MC_STAR, T(1), z2_MR_STAR );
//--------------------------------------------------------------------//
//.........这里部分代码省略.........
示例12: logic_error
inline T
DotuHelper( const DistMatrix<T,U,V>& x, const DistMatrix<T,MC,MR>& y )
{
#ifndef RELEASE
PushCallStack("internal::DotuHelper");
if( x.Grid() != y.Grid() )
throw std::logic_error("{x,y} must be distributed over the same grid");
if( (x.Height() != 1 && x.Width() != 1) ||
(y.Height() != 1 && y.Width() != 1) )
throw std::logic_error("Dotu requires x and y to be vectors");
int xLength = ( x.Width() == 1 ? x.Height() : x.Width() );
int yLength = ( y.Width() == 1 ? y.Height() : y.Width() );
if( xLength != yLength )
throw std::logic_error("Dotu requires x and y to be the same length");
#endif
const Grid& g = x.Grid();
T globalDotu;
if( x.Width() == 1 && y.Width() == 1 )
{
DistMatrix<T,MC,MR> xRedist(g);
xRedist.AlignWith( y );
xRedist = x;
int ownerCol = y.RowAlignment();
if( g.Col() == ownerCol )
{
T localDotu =
Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.ColComm() );
}
mpi::Broadcast( &globalDotu, 1, ownerCol, g.RowComm() );
}
else if( x.Width() == 1 )
{
DistMatrix<T,MR,MC> xRedist(g);
xRedist.AlignWith( y );
xRedist = x;
int ownerRow = y.ColAlignment();
if( g.Row() == ownerRow )
{
T localDotu =
Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.RowComm() );
}
mpi::Broadcast( &globalDotu, 1, ownerRow, g.ColComm() );
}
else if( y.Width() == 1 )
{
DistMatrix<T,MR,MC> xRedist(g);
xRedist.AlignWith( y );
xRedist = x;
int ownerCol = y.RowAlignment();
if( g.Col() == ownerCol )
{
T localDotu =
Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.ColComm() );
}
mpi::Broadcast( &globalDotu, 1, ownerCol, g.RowComm() );
}
else
{
DistMatrix<T,MC,MR> xRedist(g);
xRedist.AlignWith( y );
xRedist = x;
int ownerRow = y.ColAlignment();
if( g.Row() == ownerRow )
{
T localDotu =
Dotu( xRedist.LockedLocalMatrix(), y.LockedLocalMatrix() );
mpi::AllReduce( &localDotu, &globalDotu, 1, mpi::SUM, g.RowComm() );
}
mpi::Broadcast( &globalDotu, 1, ownerRow, g.ColComm() );
}
#ifndef RELEASE
PopCallStack();
#endif
return globalDotu;
}
示例13: logic_error
inline void
LocalSymmetricAccumulateLU
( Orientation orientation, T alpha,
const DistMatrix<T>& A,
const DistMatrix<T,MC, STAR>& B_MC_STAR,
const DistMatrix<T,STAR,MR >& BAdjOrTrans_STAR_MR,
DistMatrix<T,MC, STAR>& Z_MC_STAR,
DistMatrix<T,MR, STAR>& Z_MR_STAR )
{
#ifndef RELEASE
PushCallStack("internal::LocalSymmetricAccumulateLU");
if( A.Grid() != B_MC_STAR.Grid() ||
B_MC_STAR.Grid() != BAdjOrTrans_STAR_MR.Grid() ||
BAdjOrTrans_STAR_MR.Grid() != Z_MC_STAR.Grid() ||
Z_MC_STAR.Grid() != Z_MR_STAR.Grid() )
throw std::logic_error
("{A,B,Z} must be distributed over the same grid");
if( A.Height() != A.Width() ||
A.Height() != B_MC_STAR.Height() ||
A.Height() != BAdjOrTrans_STAR_MR.Width() ||
A.Height() != Z_MC_STAR.Height() ||
A.Height() != Z_MR_STAR.Height() ||
B_MC_STAR.Width() != BAdjOrTrans_STAR_MR.Height() ||
BAdjOrTrans_STAR_MR.Height() != Z_MC_STAR.Width() ||
Z_MC_STAR.Width() != Z_MR_STAR.Width() )
{
std::ostringstream msg;
msg << "Nonconformal LocalSymmetricAccumulateLU: \n"
<< " A ~ " << A.Height() << " x " << A.Width() << "\n"
<< " B[MC,* ] ~ " << B_MC_STAR.Height() << " x "
<< B_MC_STAR.Width() << "\n"
<< " B^H/T[* ,MR] ~ " << BAdjOrTrans_STAR_MR.Height() << " x "
<< BAdjOrTrans_STAR_MR.Width() << "\n"
<< " Z[MC,* ] ~ " << Z_MC_STAR.Height() << " x "
<< Z_MC_STAR.Width() << "\n"
<< " Z[MR,* ] ` " << Z_MR_STAR.Height() << " x "
<< Z_MR_STAR.Width() << "\n";
throw std::logic_error( msg.str().c_str() );
}
if( B_MC_STAR.ColAlignment() != A.ColAlignment() ||
BAdjOrTrans_STAR_MR.RowAlignment() != A.RowAlignment() ||
Z_MC_STAR.ColAlignment() != A.ColAlignment() ||
Z_MR_STAR.ColAlignment() != A.RowAlignment() )
throw std::logic_error("Partial matrix distributions are misaligned");
#endif
const Grid& g = A.Grid();
DistMatrix<T>
ATL(g), ATR(g), A00(g), A01(g), A02(g),
ABL(g), ABR(g), A10(g), A11(g), A12(g),
A20(g), A21(g), A22(g);
DistMatrix<T> D11(g);
DistMatrix<T,MC,STAR>
BT_MC_STAR(g), B0_MC_STAR(g),
BB_MC_STAR(g), B1_MC_STAR(g),
B2_MC_STAR(g);
DistMatrix<T,STAR,MR>
BLAdjOrTrans_STAR_MR(g), BRAdjOrTrans_STAR_MR(g),
B0AdjOrTrans_STAR_MR(g), B1AdjOrTrans_STAR_MR(g),
B2AdjOrTrans_STAR_MR(g);
DistMatrix<T,MC,STAR>
ZT_MC_STAR(g), Z0_MC_STAR(g),
ZB_MC_STAR(g), Z1_MC_STAR(g),
Z2_MC_STAR(g);
DistMatrix<T,MR,STAR>
ZT_MR_STAR(g), Z0_MR_STAR(g),
ZB_MR_STAR(g), Z1_MR_STAR(g),
Z2_MR_STAR(g);
const int ratio = std::max( g.Height(), g.Width() );
PushBlocksizeStack( ratio*Blocksize() );
LockedPartitionDownDiagonal
( A, ATL, ATR,
ABL, ABR, 0 );
LockedPartitionDown
( B_MC_STAR, BT_MC_STAR,
BB_MC_STAR, 0 );
LockedPartitionRight
( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 );
PartitionDown
( Z_MC_STAR, ZT_MC_STAR,
ZB_MC_STAR, 0 );
PartitionDown
( Z_MR_STAR, ZT_MR_STAR,
ZB_MR_STAR, 0 );
while( ATL.Height() < A.Height() )
{
LockedRepartitionDownDiagonal
( ATL, /**/ ATR, A00, /**/ A01, A02,
/************/ /******************/
/**/ A10, /**/ A11, A12,
ABL, /**/ ABR, A20, /**/ A21, A22 );
LockedRepartitionDown
//.........这里部分代码省略.........
示例14: logic_error
inline void
TrsmLUNSmall
( UnitOrNonUnit diag,
F alpha, const DistMatrix<F,VC,STAR>& U, DistMatrix<F,VC,STAR>& X,
bool checkIfSingular )
{
#ifndef RELEASE
PushCallStack("internal::TrsmLUNSmall");
if( U.Grid() != X.Grid() )
throw std::logic_error
("U and X must be distributed over the same grid");
if( U.Height() != U.Width() || U.Width() != X.Height() )
{
std::ostringstream msg;
msg << "Nonconformal TrsmLUN: \n"
<< " U ~ " << U.Height() << " x " << U.Width() << "\n"
<< " X ~ " << X.Height() << " x " << X.Width() << "\n";
throw std::logic_error( msg.str() );
}
if( U.ColAlignment() != X.ColAlignment() )
throw std::logic_error("U and X are assumed to be aligned");
#endif
const Grid& g = U.Grid();
// Matrix views
DistMatrix<F,VC,STAR>
UTL(g), UTR(g), U00(g), U01(g), U02(g),
UBL(g), UBR(g), U10(g), U11(g), U12(g),
U20(g), U21(g), U22(g);
DistMatrix<F,VC,STAR> XT(g), X0(g),
XB(g), X1(g),
X2(g);
// Temporary distributions
DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);
// Start the algorithm
Scale( alpha, X );
LockedPartitionUpDiagonal
( U, UTL, UTR,
UBL, UBR, 0 );
PartitionUp
( X, XT,
XB, 0 );
while( XT.Height() > 0 )
{
LockedRepartitionUpDiagonal
( UTL, /**/ UTR, U00, U01, /**/ U02,
/**/ U10, U11, /**/ U12,
/*************/ /******************/
UBL, /**/ UBR, U20, U21, /**/ U22 );
RepartitionUp
( XT, X0,
X1,
/**/ /**/
XB, X2 );
//--------------------------------------------------------------------//
U11_STAR_STAR = U11; // U11[* ,* ] <- U11[VC,* ]
X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VC,* ]
// X1[* ,* ] := U11^-1[* ,* ] X1[* ,* ]
LocalTrsm
( LEFT, UPPER, NORMAL, diag,
F(1), U11_STAR_STAR, X1_STAR_STAR, checkIfSingular );
X1 = X1_STAR_STAR;
// X0[VC,* ] -= U01[VC,* ] X1[* ,* ]
LocalGemm( NORMAL, NORMAL, F(-1), U01, X1_STAR_STAR, F(1), X0 );
//--------------------------------------------------------------------//
SlideLockedPartitionUpDiagonal
( UTL, /**/ UTR, U00, /**/ U01, U02,
/*************/ /******************/
/**/ U10, /**/ U11, U12,
UBL, /**/ UBR, U20, /**/ U21, U22 );
SlidePartitionUp
( XT, X0,
/**/ /**/
X1,
XB, X2 );
}
#ifndef RELEASE
PopCallStack();
#endif
}
示例15: logic_error
inline void
ApplyRowPivots
( DistMatrix<F>& A,
const std::vector<int>& image,
const std::vector<int>& preimage )
{
const int b = image.size();
#ifndef RELEASE
PushCallStack("ApplyRowPivots");
if( A.Height() < b || b != (int)preimage.size() )
throw std::logic_error
("image and preimage must be vectors of equal length that are not "
"taller than A.");
#endif
const int localWidth = A.LocalWidth();
if( A.Height() == 0 || A.Width() == 0 )
{
#ifndef RELEASE
PopCallStack();
#endif
return;
}
// Extract the relevant process grid information
const Grid& g = A.Grid();
const int r = g.Height();
const int colAlignment = A.ColAlignment();
const int colShift = A.ColShift();
const int myRow = g.Row();
// Extract the send and recv counts from the image and preimage.
// This process's sends may be logically partitioned into two sets:
// (a) sends from rows [0,...,b-1]
// (b) sends from rows [b,...]
// The latter is analyzed with image, the former deduced with preimage.
std::vector<int> sendCounts(r,0), recvCounts(r,0);
for( int i=colShift; i<b; i+=r )
{
const int sendRow = preimage[i];
const int sendTo = (colAlignment+sendRow) % r;
sendCounts[sendTo] += localWidth;
const int recvRow = image[i];
const int recvFrom = (colAlignment+recvRow) % r;
recvCounts[recvFrom] += localWidth;
}
for( int i=0; i<b; ++i )
{
const int sendRow = preimage[i];
if( sendRow >= b )
{
const int sendTo = (colAlignment+sendRow) % r;
if( sendTo == myRow )
{
const int sendFrom = (colAlignment+i) % r;
recvCounts[sendFrom] += localWidth;
}
}
const int recvRow = image[i];
if( recvRow >= b )
{
const int recvFrom = (colAlignment+recvRow) % r;
if( recvFrom == myRow )
{
const int recvTo = (colAlignment+i) % r;
sendCounts[recvTo] += localWidth;
}
}
}
// Construct the send and recv displacements from the counts
std::vector<int> sendDispls(r), recvDispls(r);
int totalSend=0, totalRecv=0;
for( int i=0; i<r; ++i )
{
sendDispls[i] = totalSend;
recvDispls[i] = totalRecv;
totalSend += sendCounts[i];
totalRecv += recvCounts[i];
}
#ifndef RELEASE
if( totalSend != totalRecv )
{
std::ostringstream msg;
msg << "Send and recv counts do not match: (send,recv)="
<< totalSend << "," << totalRecv;
throw std::logic_error( msg.str().c_str() );
}
#endif
// Fill vectors with the send data
const int ALDim = A.LocalLDim();
std::vector<F> sendData(std::max(1,totalSend));
std::vector<int> offsets(r,0);
const int localHeight = LocalLength( b, colShift, r );
for( int iLocal=0; iLocal<localHeight; ++iLocal )
{
const int sendRow = preimage[colShift+iLocal*r];
const int sendTo = (colAlignment+sendRow) % r;
//.........这里部分代码省略.........