本文整理汇总了C++中LocalGemm函数的典型用法代码示例。如果您正苦于以下问题:C++ LocalGemm函数的具体用法?C++ LocalGemm怎么用?C++ LocalGemm使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了LocalGemm函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Blocksize
void LUNMedium
( const AbstractDistMatrix<F>& UPre, AbstractDistMatrix<F>& XPre,
bool checkIfSingular )
{
DEBUG_CSE
const Int m = XPre.Height();
const Int bsize = Blocksize();
const Grid& g = UPre.Grid();
DistMatrixReadProxy<F,F,MC,MR> UProx( UPre );
DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre );
auto& U = UProx.GetLocked();
auto& X = XProx.Get();
DistMatrix<F,MC, STAR> U01_MC_STAR(g);
DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g);
const Int kLast = LastOffset( m, bsize );
Int k=kLast, kOld=m;
while( true )
{
const bool in2x2 = ( k>0 && U.Get(k,k-1) != F(0) );
if( in2x2 )
--k;
const Int nb = kOld-k;
const Range<Int> ind0( 0, k ),
ind1( k, k+nb );
auto U01 = U( ind0, ind1 );
auto U11 = U( ind1, ind1 );
auto X0 = X( ind0, ALL );
auto X1 = X( ind1, ALL );
U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR]
X1Trans_MR_STAR.AlignWith( X0 );
Transpose( X1, X1Trans_MR_STAR );
// X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ]
// = (U11^-1[* ,* ] X1[* ,MR])^T
LocalQuasiTrsm
( RIGHT, UPPER, TRANSPOSE,
F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular );
Transpose( X1Trans_MR_STAR, X1 );
U01_MC_STAR.AlignWith( X0 );
U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR]
// X0[MC,MR] -= U01[MC,* ] X1[* ,MR]
LocalGemm
( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 );
if( k == 0 )
break;
kOld = k;
k -= Min(bsize,k);
}
}
示例2: Blocksize
void LUNMedium
( UnitOrNonUnit diag,
const AbstractDistMatrix<F>& UPre,
AbstractDistMatrix<F>& XPre,
bool checkIfSingular )
{
EL_DEBUG_CSE
const Int m = XPre.Height();
const Int bsize = Blocksize();
const Grid& g = UPre.Grid();
DistMatrixReadProxy<F,F,MC,MR> UProx( UPre );
DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre );
auto& U = UProx.GetLocked();
auto& X = XProx.Get();
DistMatrix<F,MC, STAR> U01_MC_STAR(g);
DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g);
const Int kLast = LastOffset( m, bsize );
for( Int k=kLast; k>=0; k-=bsize )
{
const Int nb = Min(bsize,m-k);
const Range<Int> ind0( 0, k ),
ind1( k, k+nb );
auto U01 = U( ind0, ind1 );
auto U11 = U( ind1, ind1 );
auto X0 = X( ind0, ALL );
auto X1 = X( ind1, ALL );
U11_STAR_STAR = U11; // U11[* ,* ] <- U11[MC,MR]
X1Trans_MR_STAR.AlignWith( X0 );
Transpose( X1, X1Trans_MR_STAR );
// X1^T[MR,* ] := X1^T[MR,* ] U11^-T[* ,* ]
// = (U11^-1[* ,* ] X1[* ,MR])^T
LocalTrsm
( RIGHT, UPPER, TRANSPOSE, diag,
F(1), U11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular );
Transpose( X1Trans_MR_STAR, X1 );
U01_MC_STAR.AlignWith( X0 );
U01_MC_STAR = U01; // U01[MC,* ] <- U01[MC,MR]
// X0[MC,MR] -= U01[MC,* ] X1[* ,MR]
LocalGemm
( NORMAL, TRANSPOSE, F(-1), U01_MC_STAR, X1Trans_MR_STAR, F(1), X0 );
}
}
示例3: Blocksize
void LLNMedium
( const AbstractDistMatrix<F>& LPre,
AbstractDistMatrix<F>& XPre,
bool checkIfSingular )
{
DEBUG_CSE
const Int m = XPre.Height();
const Int bsize = Blocksize();
const Grid& g = LPre.Grid();
DistMatrixReadProxy<F,F,MC,MR> LProx( LPre );
DistMatrixReadWriteProxy<F,F,MC,MR> XProx( XPre );
auto& L = LProx.GetLocked();
auto& X = XProx.Get();
DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
DistMatrix<F,MC, STAR> L21_MC_STAR(g);
DistMatrix<F,MR, STAR> X1Trans_MR_STAR(g);
for( Int k=0; k<m; k+=bsize )
{
const Int nbProp = Min(bsize,m-k);
const bool in2x2 = ( k+nbProp<m && L.Get(k+nbProp-1,k+nbProp) != F(0) );
const Int nb = ( in2x2 ? nbProp+1 : nbProp );
const Range<Int> ind1( k, k+nb ),
ind2( k+nb, m );
auto L11 = L( ind1, ind1 );
auto L21 = L( ind2, ind1 );
auto X1 = X( ind1, ALL );
auto X2 = X( ind2, ALL );
L11_STAR_STAR = L11; // L11[* ,* ] <- L11[MC,MR]
X1Trans_MR_STAR.AlignWith( X2 );
Transpose( X1, X1Trans_MR_STAR );
// X1^T[MR,* ] := X1^T[MR,* ] L11^-T[* ,* ]
// = (L11^-1[* ,* ] X1[* ,MR])^T
LocalQuasiTrsm
( RIGHT, LOWER, TRANSPOSE,
F(1), L11_STAR_STAR, X1Trans_MR_STAR, checkIfSingular );
Transpose( X1Trans_MR_STAR, X1 );
L21_MC_STAR.AlignWith( X2 );
L21_MC_STAR = L21; // L21[MC,* ] <- L21[MC,MR]
// X2[MC,MR] -= L21[MC,* ] X1[* ,MR]
LocalGemm
( NORMAL, TRANSPOSE, F(-1), L21_MC_STAR, X1Trans_MR_STAR, F(1), X2 );
}
}
示例4: AProx
void SUMMA_NTDot
( Orientation orientB,
T alpha,
const AbstractDistMatrix<T>& APre,
const AbstractDistMatrix<T>& BPre,
AbstractDistMatrix<T>& CPre,
Int blockSize=2000 )
{
EL_DEBUG_CSE
const Int m = CPre.Height();
const Int n = CPre.Width();
const Grid& g = APre.Grid();
DistMatrixReadProxy<T,T,STAR,VC> AProx( APre );
auto& A = AProx.GetLocked();
ElementalProxyCtrl BCtrl;
BCtrl.rowConstrain = true;
BCtrl.rowAlign = A.RowAlign();
DistMatrixReadProxy<T,T,STAR,VC> BProx( BPre, BCtrl );
auto& B = BProx.GetLocked();
DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre );
auto& C = CProx.Get();
DistMatrix<T,STAR,STAR> C11_STAR_STAR(g);
for( Int kOuter=0; kOuter<m; kOuter+=blockSize )
{
const Int nbOuter = Min(blockSize,m-kOuter);
const Range<Int> indOuter( kOuter, kOuter+nbOuter );
auto A1 = A( indOuter, ALL );
for( Int kInner=0; kInner<n; kInner+=blockSize )
{
const Int nbInner = Min(blockSize,n-kInner);
const Range<Int> indInner( kInner, kInner+nbInner );
auto B1 = B( indInner, ALL );
auto C11 = C( indOuter, indInner );
LocalGemm( NORMAL, orientB, alpha, A1, B1, C11_STAR_STAR );
AxpyContract( T(1), C11_STAR_STAR, C11 );
}
}
}
示例5: AProx
void LT_Dot
( T alpha,
const AbstractDistMatrix<T>& APre,
AbstractDistMatrix<T>& CPre,
const bool conjugate,
Int blockSize=2000 )
{
EL_DEBUG_CSE
const Int n = CPre.Height();
const Grid& g = APre.Grid();
const Orientation orient = ( conjugate ? ADJOINT : TRANSPOSE );
DistMatrixReadProxy<T,T,VC,STAR> AProx( APre );
auto& A = AProx.GetLocked();
DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre );
auto& C = CProx.Get();
DistMatrix<T,STAR,STAR> Z( blockSize, blockSize, g );
Zero( Z );
for( Int kOuter=0; kOuter<n; kOuter+=blockSize )
{
const Int nbOuter = Min(blockSize,n-kOuter);
const Range<Int> indOuter( kOuter, kOuter+nbOuter );
auto A1 = A( ALL, indOuter );
auto C11 = C( indOuter, indOuter );
Z.Resize( nbOuter, nbOuter );
Syrk( LOWER, TRANSPOSE, alpha, A1.Matrix(), Z.Matrix(), conjugate );
AxpyContract( T(1), Z, C11 );
for( Int kInner=kOuter+nbOuter; kInner<n; kInner+=blockSize )
{
const Int nbInner = Min(blockSize,n-kInner);
const Range<Int> indInner( kInner, kInner+nbInner );
auto A2 = A( ALL, indInner );
auto C21 = C( indInner, indOuter );
LocalGemm( orient, NORMAL, alpha, A1, A2, Z );
AxpyContract( T(1), Z, C21 );
}
}
}
示例6: Blocksize
void SUMMA_NTC
( Orientation orientB,
T alpha,
const AbstractDistMatrix<T>& APre,
const AbstractDistMatrix<T>& BPre,
AbstractDistMatrix<T>& CPre )
{
EL_DEBUG_CSE
const Int sumDim = APre.Width();
const Int bsize = Blocksize();
const Grid& g = APre.Grid();
const bool conjugate = ( orientB == ADJOINT );
DistMatrixReadProxy<T,T,MC,MR> AProx( APre );
DistMatrixReadProxy<T,T,MC,MR> BProx( BPre );
DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre );
auto& A = AProx.GetLocked();
auto& B = BProx.GetLocked();
auto& C = CProx.Get();
// Temporary distributions
DistMatrix<T,MC,STAR> A1_MC_STAR(g);
DistMatrix<T,VR,STAR> B1_VR_STAR(g);
DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g);
A1_MC_STAR.AlignWith( C );
B1_VR_STAR.AlignWith( C );
B1Trans_STAR_MR.AlignWith( C );
for( Int k=0; k<sumDim; k+=bsize )
{
const Int nb = Min(bsize,sumDim-k);
auto A1 = A( ALL, IR(k,k+nb) );
auto B1 = B( ALL, IR(k,k+nb) );
A1_MC_STAR = A1;
B1_VR_STAR = B1;
Transpose( B1_VR_STAR, B1Trans_STAR_MR, conjugate );
// C[MC,MR] += alpha A1[MC,*] (B1[MR,*])^T
LocalGemm
( NORMAL, NORMAL, alpha, A1_MC_STAR, B1Trans_STAR_MR, T(1), C );
}
}
示例7: Blocksize
void SUMMA_TNA
( Orientation orientA,
T alpha,
const AbstractDistMatrix<T>& APre,
const AbstractDistMatrix<T>& BPre,
AbstractDistMatrix<T>& CPre )
{
DEBUG_CSE
const Int n = CPre.Width();
const Int bsize = Blocksize();
const Grid& g = APre.Grid();
DistMatrixReadProxy<T,T,MC,MR> AProx( APre );
DistMatrixReadProxy<T,T,MC,MR> BProx( BPre );
DistMatrixReadWriteProxy<T,T,MC,MR> CProx( CPre );
auto& A = AProx.GetLocked();
auto& B = BProx.GetLocked();
auto& C = CProx.Get();
// Temporary distributions
DistMatrix<T,MC,STAR> B1_MC_STAR(g);
DistMatrix<T,MR,STAR> D1_MR_STAR(g);
DistMatrix<T,MR,MC > D1_MR_MC(g);
B1_MC_STAR.AlignWith( A );
D1_MR_STAR.AlignWith( A );
for( Int k=0; k<n; k+=bsize )
{
const Int nb = Min(bsize,n-k);
auto B1 = B( ALL, IR(k,k+nb) );
auto C1 = C( ALL, IR(k,k+nb) );
// D1[MR,*] := alpha (A1[MC,MR])^T B1[MC,*]
// = alpha (A1^T)[MR,MC] B1[MC,*]
B1_MC_STAR = B1;
LocalGemm( orientA, NORMAL, alpha, A, B1_MC_STAR, D1_MR_STAR );
// C1[MC,MR] += scattered & transposed D1[MR,*] summed over grid cols
Contract( D1_MR_STAR, D1_MR_MC );
Axpy( T(1), D1_MR_MC, C1 );
}
}
示例8: D
void BackwardSingle
( const DistMatrix<F,VC,STAR>& L,
DistMatrix<F,VC,STAR>& X,
bool conjugate=false )
{
const Grid& g = L.Grid();
const Orientation orientation = ( conjugate ? ADJOINT : TRANSPOSE );
DistMatrix<F,STAR,STAR> D(g), L11_STAR_STAR(g), Z1_STAR_STAR(g);
FormDiagonalBlocks( L, D, conjugate );
const Int m = L.Height();
const Int n = L.Width();
const Int numRHS = X.Width();
const Int bsize = Blocksize();
const Int kLast = LastOffset( n, bsize );
for( Int k=kLast; k>=0; k-=bsize )
{
const Int nb = Min(bsize,n-k);
const Range<Int> ind1(k,k+nb), ind2(k+nb,m);
auto L11Trans_STAR_STAR = D( IR(0,nb), ind1 );
auto L21 = L( ind2, ind1 );
auto X1 = X( ind1, IR(0,numRHS) );
auto X2 = X( ind2, IR(0,numRHS) );
// X1 -= L21' X2
LocalGemm( orientation, NORMAL, F(-1), L21, X2, Z1_STAR_STAR );
axpy::util::UpdateWithLocalData( F(1), X1, Z1_STAR_STAR );
El::AllReduce( Z1_STAR_STAR, X1.DistComm() );
// X1 := L11^-1 X1
LocalTrsm
( LEFT, UPPER, NORMAL, UNIT, F(1), L11Trans_STAR_STAR, Z1_STAR_STAR );
X1 = Z1_STAR_STAR;
}
}
示例9: PushCallStack
inline void
ApplyPackedReflectorsLUVF
( int offset,
const DistMatrix<R>& H,
DistMatrix<R>& A )
{
#ifndef RELEASE
PushCallStack("internal::ApplyPackedReflectorsLUVF");
if( H.Grid() != A.Grid() )
throw std::logic_error("{H,A} must be distributed over the same grid");
if( offset < 0 || offset > H.Height() )
throw std::logic_error("Transforms out of bounds");
if( H.Width() != A.Height() )
throw std::logic_error
("Width of transforms must equal height of target matrix");
#endif
const Grid& g = H.Grid();
DistMatrix<R>
HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g),
HBL(g), HBR(g), H10(g), H11(g), H12(g),
H20(g), H21(g), H22(g);
DistMatrix<R>
AT(g), A0(g), ATop(g),
AB(g), A1(g),
A2(g);
DistMatrix<R> HPanCopy(g);
DistMatrix<R,VC, STAR> HPan_VC_STAR(g);
DistMatrix<R,MC, STAR> HPan_MC_STAR(g);
DistMatrix<R,STAR,STAR> SInv_STAR_STAR(g);
DistMatrix<R,STAR,MR > Z_STAR_MR(g);
DistMatrix<R,STAR,VR > Z_STAR_VR(g);
LockedPartitionDownDiagonal
( H, HTL, HTR,
HBL, HBR, 0 );
PartitionDown
( A, AT,
AB, 0 );
while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
{
LockedRepartitionDownDiagonal
( HTL, /**/ HTR, H00, /**/ H01, H02,
/*************/ /******************/
/**/ H10, /**/ H11, H12,
HBL, /**/ HBR, H20, /**/ H21, H22 );
const int HPanHeight = H01.Height() + H11.Height();
const int HPanOffset =
std::min( H11.Width(), std::max(offset-H00.Width(),0) );
const int HPanWidth = H11.Width()-HPanOffset;
HPan.LockedView( H, 0, H00.Width()+HPanOffset, HPanHeight, HPanWidth );
RepartitionDown
( AT, A0,
/**/ /**/
A1,
AB, A2 );
ATop.View2x1( A0,
A1 );
HPan_MC_STAR.AlignWith( ATop );
Z_STAR_MR.AlignWith( ATop );
Z_STAR_VR.AlignWith( ATop );
Zeros( HPan.Width(), ATop.Width(), Z_STAR_MR );
Zeros( HPan.Width(), HPan.Width(), SInv_STAR_STAR );
//--------------------------------------------------------------------//
HPanCopy = HPan;
MakeTrapezoidal( RIGHT, UPPER, offset, HPanCopy );
SetDiagonalToOne( RIGHT, offset, HPanCopy );
HPan_VC_STAR = HPanCopy;
Syrk
( LOWER, TRANSPOSE,
R(1), HPan_VC_STAR.LockedLocalMatrix(),
R(0), SInv_STAR_STAR.LocalMatrix() );
SInv_STAR_STAR.SumOverGrid();
HalveMainDiagonal( SInv_STAR_STAR );
HPan_MC_STAR = HPanCopy;
LocalGemm
( TRANSPOSE, NORMAL, R(1), HPan_MC_STAR, ATop, R(0), Z_STAR_MR );
Z_STAR_VR.SumScatterFrom( Z_STAR_MR );
LocalTrsm
( LEFT, LOWER, NORMAL, NON_UNIT,
R(1), SInv_STAR_STAR, Z_STAR_VR );
Z_STAR_MR = Z_STAR_VR;
LocalGemm( NORMAL, NORMAL, R(-1), HPan_MC_STAR, Z_STAR_MR, R(1), ATop );
//--------------------------------------------------------------------//
HPan_MC_STAR.FreeAlignments();
Z_STAR_MR.FreeAlignments();
Z_STAR_VR.FreeAlignments();
SlideLockedPartitionDownDiagonal
( HTL, /**/ HTR, H00, H01, /**/ H02,
/**/ H10, H11, /**/ H12,
/*************/ /******************/
//.........这里部分代码省略.........
示例10: PushCallStack
inline void
TrsmLLTSmall
( Orientation orientation, UnitOrNonUnit diag,
F alpha, const DistMatrix<F,STAR,VR>& L, DistMatrix<F,VR,STAR>& X,
bool checkIfSingular )
{
#ifndef RELEASE
PushCallStack("internal::TrsmLLTSmall");
if( L.Grid() != X.Grid() )
throw std::logic_error
("L and X must be distributed over the same grid");
if( orientation == NORMAL )
throw std::logic_error("TrsmLLT expects a (Conjugate)Transpose option");
if( L.Height() != L.Width() || L.Height() != X.Height() )
{
std::ostringstream msg;
msg << "Nonconformal TrsmLLT: \n"
<< " L ~ " << L.Height() << " x " << L.Width() << "\n"
<< " X ~ " << X.Height() << " x " << X.Width() << "\n";
throw std::logic_error( msg.str().c_str() );
}
if( L.RowAlignment() != X.ColAlignment() )
throw std::logic_error("L and X must be aligned");
#endif
const Grid& g = L.Grid();
// Matrix views
DistMatrix<F,STAR,VR>
LTL(g), LTR(g), L00(g), L01(g), L02(g),
LBL(g), LBR(g), L10(g), L11(g), L12(g),
L20(g), L21(g), L22(g);
DistMatrix<F,VR,STAR> XT(g), X0(g),
XB(g), X1(g),
X2(g);
// Temporary distributions
DistMatrix<F,STAR,STAR> L11_STAR_STAR(g);
DistMatrix<F,STAR,STAR> X1_STAR_STAR(g);
// Start the algorithm
Scale( alpha, X );
LockedPartitionUpDiagonal
( L, LTL, LTR,
LBL, LBR, 0 );
PartitionUp
( X, XT,
XB, 0 );
while( XT.Height() > 0 )
{
LockedRepartitionUpDiagonal
( LTL, /**/ LTR, L00, L01, /**/ L02,
/**/ L10, L11, /**/ L12,
/*************/ /******************/
LBL, /**/ LBR, L20, L21, /**/ L22 );
RepartitionUp
( XT, X0,
X1,
/**/ /**/
XB, X2 );
//--------------------------------------------------------------------//
L11_STAR_STAR = L11; // L11[* ,* ] <- L11[* ,VR]
X1_STAR_STAR = X1; // X1[* ,* ] <- X1[VR,* ]
// X1[* ,* ] := L11^-[T/H][* ,* ] X1[* ,* ]
LocalTrsm
( LEFT, LOWER, orientation, diag,
F(1), L11_STAR_STAR, X1_STAR_STAR, checkIfSingular );
X1 = X1_STAR_STAR;
// X0[VR,* ] -= L10[* ,VR]^(T/H) X1[* ,* ]
LocalGemm( orientation, NORMAL, F(-1), L10, X1_STAR_STAR, F(1), X0 );
//--------------------------------------------------------------------//
SlideLockedPartitionUpDiagonal
( LTL, /**/ LTR, L00, /**/ L01, L02,
/*************/ /******************/
/**/ L10, /**/ L11, L12,
LBL, /**/ LBR, L20, /**/ L21, L22 );
SlidePartitionUp
( XT, X0,
/**/ /**/
X1,
XB, X2 );
}
#ifndef RELEASE
PopCallStack();
#endif
}
示例11: HPDInverseLVar2
inline void
HPDInverseLVar2( DistMatrix<F>& A )
{
#ifndef RELEASE
PushCallStack("internal::HPDInverseLVar2");
if( A.Height() != A.Width() )
throw std::logic_error("Nonsquare matrices cannot be triangular");
#endif
const Grid& g = A.Grid();
// Matrix views
DistMatrix<F>
ATL(g), ATR(g), A00(g), A01(g), A02(g),
ABL(g), ABR(g), A10(g), A11(g), A12(g),
A20(g), A21(g), A22(g);
// Temporary distributions
DistMatrix<F,STAR,STAR> A11_STAR_STAR(g);
DistMatrix<F,STAR,VR > A10_STAR_VR(g);
DistMatrix<F,VC, STAR> A21_VC_STAR(g);
DistMatrix<F,STAR,MC > A10_STAR_MC(g);
DistMatrix<F,STAR,MR > A10_STAR_MR(g);
DistMatrix<F,STAR,MC > A21Trans_STAR_MC(g);
DistMatrix<F,VR, STAR> A21_VR_STAR(g);
DistMatrix<F,STAR,MR > A21Adj_STAR_MR(g);
// Start the algorithm
PartitionDownDiagonal
( A, ATL, ATR,
ABL, ABR, 0 );
while( ATL.Height() < A.Height() )
{
RepartitionDownDiagonal
( ATL, /**/ ATR, A00, /**/ A01, A02,
/*************/ /******************/
/**/ A10, /**/ A11, A12,
ABL, /**/ ABR, A20, /**/ A21, A22 );
A10_STAR_VR.AlignWith( A00 );
A21_VC_STAR.AlignWith( A20 );
A10_STAR_MC.AlignWith( A00 );
A10_STAR_MR.AlignWith( A00 );
A21Trans_STAR_MC.AlignWith( A20 );
A21_VR_STAR.AlignWith( A22 );
A21Adj_STAR_MR.AlignWith( A22 );
//--------------------------------------------------------------------//
A11_STAR_STAR = A11;
LocalCholesky( LOWER, A11_STAR_STAR );
A10_STAR_VR = A10;
LocalTrsm
( LEFT, LOWER, NORMAL, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR );
A21_VC_STAR = A21;
LocalTrsm
( RIGHT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A21_VC_STAR );
A10_STAR_MC = A10_STAR_VR;
A10_STAR_MR = A10_STAR_VR;
LocalTrrk
( LOWER, ADJOINT,
F(1), A10_STAR_MC, A10_STAR_MR, F(1), A00 );
A21Trans_STAR_MC.TransposeFrom( A21_VC_STAR );
LocalGemm
( TRANSPOSE, NORMAL, F(-1), A21Trans_STAR_MC, A10_STAR_MR, F(1), A20 );
A21_VR_STAR = A21_VC_STAR;
A21Adj_STAR_MR.AdjointFrom( A21_VR_STAR );
LocalTrrk
( LOWER, TRANSPOSE,
F(-1), A21Trans_STAR_MC, A21Adj_STAR_MR, F(1), A22 );
LocalTrsm
( LEFT, LOWER, ADJOINT, NON_UNIT, F(1), A11_STAR_STAR, A10_STAR_VR );
LocalTrsm
( RIGHT, LOWER, NORMAL, NON_UNIT, F(-1), A11_STAR_STAR, A21_VC_STAR );
LocalTriangularInverse( LOWER, NON_UNIT, A11_STAR_STAR );
LocalTrtrmm( ADJOINT, LOWER, A11_STAR_STAR );
A11 = A11_STAR_STAR;
A10 = A10_STAR_VR;
A21 = A21_VC_STAR;
//--------------------------------------------------------------------//
A10_STAR_VR.FreeAlignments();
A21_VC_STAR.FreeAlignments();
A10_STAR_MC.FreeAlignments();
A10_STAR_MR.FreeAlignments();
A21Trans_STAR_MC.FreeAlignments();
A21_VR_STAR.FreeAlignments();
A21Adj_STAR_MR.FreeAlignments();
SlidePartitionDownDiagonal
( ATL, /**/ ATR, A00, A01, /**/ A02,
/**/ A10, A11, /**/ A12,
/*************/ /******************/
ABL, /**/ ABR, A20, A21, /**/ A22 );
//.........这里部分代码省略.........
示例12: entry
inline void
TrmmLLNC
( UnitOrNonUnit diag,
T alpha, const DistMatrix<T>& L,
DistMatrix<T>& X )
{
#ifndef RELEASE
CallStackEntry entry("internal::TrmmLLNC");
if( L.Grid() != X.Grid() )
throw std::logic_error
("L and X must be distributed over the same grid");
if( L.Height() != L.Width() || L.Width() != X.Height() )
{
std::ostringstream msg;
msg << "Nonconformal TrmmLLNC: \n"
<< " L ~ " << L.Height() << " x " << L.Width() << "\n"
<< " X ~ " << X.Height() << " x " << X.Width() << "\n";
throw std::logic_error( msg.str().c_str() );
}
#endif
const Grid& g = L.Grid();
// Matrix views
DistMatrix<T>
LTL(g), LTR(g), L00(g), L01(g), L02(g),
LBL(g), LBR(g), L10(g), L11(g), L12(g),
L20(g), L21(g), L22(g);
DistMatrix<T> XT(g), X0(g),
XB(g), X1(g),
X2(g);
// Temporary distributions
DistMatrix<T,MC, STAR> L21_MC_STAR(g);
DistMatrix<T,STAR,STAR> L11_STAR_STAR(g);
DistMatrix<T,STAR,VR > X1_STAR_VR(g);
DistMatrix<T,MR, STAR> X1Trans_MR_STAR(g);
// Start the algorithm
Scale( alpha, X );
LockedPartitionUpDiagonal
( L, LTL, LTR,
LBL, LBR, 0 );
PartitionUp
( X, XT,
XB, 0 );
while( XT.Height() > 0 )
{
LockedRepartitionUpDiagonal
( LTL, /**/ LTR, L00, L01, /**/ L02,
/**/ L10, L11, /**/ L12,
/*************/ /******************/
LBL, /**/ LBR, L20, L21, /**/ L22 );
RepartitionUp
( XT, X0,
X1,
/**/ /**/
XB, X2 );
L21_MC_STAR.AlignWith( X2 );
X1Trans_MR_STAR.AlignWith( X2 );
X1_STAR_VR.AlignWith( X1 );
//--------------------------------------------------------------------//
L21_MC_STAR = L21;
X1Trans_MR_STAR.TransposeFrom( X1 );
LocalGemm
( NORMAL, TRANSPOSE, T(1), L21_MC_STAR, X1Trans_MR_STAR, T(1), X2 );
L11_STAR_STAR = L11;
X1_STAR_VR.TransposeFrom( X1Trans_MR_STAR );
LocalTrmm( LEFT, LOWER, NORMAL, diag, T(1), L11_STAR_STAR, X1_STAR_VR );
X1 = X1_STAR_VR;
//--------------------------------------------------------------------//
L21_MC_STAR.FreeAlignments();
X1Trans_MR_STAR.FreeAlignments();
X1_STAR_VR.FreeAlignments();
SlideLockedPartitionUpDiagonal
( LTL, /**/ LTR, L00, /**/ L01, L02,
/*************/ /******************/
/**/ L10, /**/ L11, L12,
LBL, /**/ LBR, L20, /**/ L21, L22 );
SlidePartitionUp
( XT, X0,
/**/ /**/
X1,
XB, X2 );
}
}
示例13: UVar3
inline void
UVar3( UnitOrNonUnit diag, DistMatrix<F>& U )
{
#ifndef RELEASE
CallStackEntry entry("triangular_inverse::UVar3");
if( U.Height() != U.Width() )
LogicError("Nonsquare matrices cannot be triangular");
#endif
const Grid& g = U.Grid();
// Matrix views
DistMatrix<F>
UTL(g), UTR(g), U00(g), U01(g), U02(g),
UBL(g), UBR(g), U10(g), U11(g), U12(g),
U20(g), U21(g), U22(g);
// Temporary distributions
DistMatrix<F,VC, STAR> U01_VC_STAR(g);
DistMatrix<F,STAR,STAR> U11_STAR_STAR(g);
DistMatrix<F,STAR,VR > U12_STAR_VR(g);
DistMatrix<F,STAR,MC > U01Trans_STAR_MC(g);
DistMatrix<F,MR, STAR> U12Trans_MR_STAR(g);
// Start the algorithm
PartitionUpDiagonal
( U, UTL, UTR,
UBL, UBR, 0 );
while( UBR.Height() < U.Height() )
{
RepartitionUpDiagonal
( UTL, /**/ UTR, U00, U01, /**/ U02,
/**/ U10, U11, /**/ U12,
/*************/ /******************/
UBL, /**/ UBR, U20, U21, /**/ U22 );
U01Trans_STAR_MC.AlignWith( U02 );
U12Trans_MR_STAR.AlignWith( U02 );
//--------------------------------------------------------------------//
U01_VC_STAR = U01;
U11_STAR_STAR = U11;
LocalTrsm
( RIGHT, UPPER, NORMAL, diag, F(-1), U11_STAR_STAR, U01_VC_STAR );
// We transpose before the communication to avoid cache-thrashing
// in the unpacking stage.
U12Trans_MR_STAR.TransposeFrom( U12 );
U01Trans_STAR_MC.TransposeFrom( U01_VC_STAR );
LocalGemm
( TRANSPOSE, TRANSPOSE,
F(1), U01Trans_STAR_MC, U12Trans_MR_STAR, F(1), U02 );
U01.TransposeFrom( U01Trans_STAR_MC );
U12_STAR_VR.TransposeFrom( U12Trans_MR_STAR );
LocalTrsm
( LEFT, UPPER, NORMAL, diag, F(1), U11_STAR_STAR, U12_STAR_VR );
LocalTriangularInverse( UPPER, diag, U11_STAR_STAR );
U11 = U11_STAR_STAR;
U12 = U12_STAR_VR;
//--------------------------------------------------------------------//
SlidePartitionUpDiagonal
( UTL, /**/ UTR, U00, /**/ U01, U02,
/*************/ /******************/
/**/ U10, /**/ U11, U12,
UBL, /**/ UBR, U20, /**/ U21, U22 );
}
}
示例14: PushCallStack
//.........这里部分代码省略.........
LockedPartitionDownDiagonal
( A, ATL, ATR,
ABL, ABR, 0 );
LockedPartitionDown
( B_MC_STAR, BT_MC_STAR,
BB_MC_STAR, 0 );
LockedPartitionRight
( BAdjOrTrans_STAR_MR, BLAdjOrTrans_STAR_MR, BRAdjOrTrans_STAR_MR, 0 );
PartitionDown
( Z_MC_STAR, ZT_MC_STAR,
ZB_MC_STAR, 0 );
PartitionDown
( Z_MR_STAR, ZT_MR_STAR,
ZB_MR_STAR, 0 );
while( ATL.Height() < A.Height() )
{
LockedRepartitionDownDiagonal
( ATL, /**/ ATR, A00, /**/ A01, A02,
/************/ /******************/
/**/ A10, /**/ A11, A12,
ABL, /**/ ABR, A20, /**/ A21, A22 );
LockedRepartitionDown
( BT_MC_STAR, B0_MC_STAR,
/**********/ /**********/
B1_MC_STAR,
BB_MC_STAR, B2_MC_STAR );
LockedRepartitionRight
( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR,
B0AdjOrTrans_STAR_MR, /**/ B1AdjOrTrans_STAR_MR,
B2AdjOrTrans_STAR_MR );
RepartitionDown
( ZT_MC_STAR, Z0_MC_STAR,
/**********/ /**********/
Z1_MC_STAR,
ZB_MC_STAR, Z2_MC_STAR );
RepartitionDown
( ZT_MR_STAR, Z0_MR_STAR,
/**********/ /**********/
Z1_MR_STAR,
ZB_MR_STAR, Z2_MR_STAR );
D11.AlignWith( A11 );
//--------------------------------------------------------------------//
D11 = A11;
MakeTrapezoidal( LEFT, UPPER, 0, D11 );
LocalGemm
( NORMAL, orientation,
alpha, D11, B1AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR );
MakeTrapezoidal( LEFT, UPPER, 1, D11 );
LocalGemm
( orientation, NORMAL, alpha, D11, B1_MC_STAR, T(1), Z1_MR_STAR );
LocalGemm
( NORMAL, orientation,
alpha, A12, B2AdjOrTrans_STAR_MR, T(1), Z1_MC_STAR );
LocalGemm
( orientation, NORMAL, alpha, A12, B1_MC_STAR, T(1), Z2_MR_STAR );
//--------------------------------------------------------------------//
D11.FreeAlignments();
SlideLockedPartitionDownDiagonal
( ATL, /**/ ATR, A00, A01, /**/ A02,
/**/ A10, A11, /**/ A12,
/*************/ /******************/
ABL, /**/ ABR, A20, A21, /**/ A22 );
SlideLockedPartitionDown
( BT_MC_STAR, B0_MC_STAR,
B1_MC_STAR,
/**********/ /**********/
BB_MC_STAR, B2_MC_STAR );
SlideLockedPartitionRight
( BLAdjOrTrans_STAR_MR, /**/ BRAdjOrTrans_STAR_MR,
B0AdjOrTrans_STAR_MR, B1AdjOrTrans_STAR_MR, /**/ B2AdjOrTrans_STAR_MR );
SlidePartitionDown
( ZT_MC_STAR, Z0_MC_STAR,
Z1_MC_STAR,
/**********/ /**********/
ZB_MC_STAR, Z2_MC_STAR );
SlidePartitionDown
( ZT_MR_STAR, Z0_MR_STAR,
Z1_MR_STAR,
/**********/ /**********/
ZB_MR_STAR, Z2_MR_STAR );
}
PopBlocksizeStack();
#ifndef RELEASE
PopCallStack();
#endif
}
示例15: cse
inline void
RUVF
( Conjugation conjugation, Int offset,
const DistMatrix<F>& H, const DistMatrix<F,MD,STAR>& t, DistMatrix<F>& A )
{
#ifndef RELEASE
CallStackEntry cse("apply_packed_reflectors::RUVF");
if( H.Grid() != t.Grid() || t.Grid() != A.Grid() )
LogicError("{H,t,A} must be distributed over the same grid");
// TODO: Proper dimension checks
if( t.Height() != H.DiagonalLength(offset) )
LogicError("t must be the same length as H's offset diag");
if( !t.AlignedWithDiagonal( H, offset ) )
LogicError("t must be aligned with H's 'offset' diagonal");
#endif
const Grid& g = H.Grid();
DistMatrix<F>
HTL(g), HTR(g), H00(g), H01(g), H02(g), HPan(g), HPanCopy(g),
HBL(g), HBR(g), H10(g), H11(g), H12(g),
H20(g), H21(g), H22(g);
DistMatrix<F> ALeft(g);
DistMatrix<F,MD,STAR>
tT(g), t0(g),
tB(g), t1(g),
t2(g);
DistMatrix<F,VC, STAR> HPan_VC_STAR(g);
DistMatrix<F,MR, STAR> HPan_MR_STAR(g);
DistMatrix<F,STAR,STAR> t1_STAR_STAR(g);
DistMatrix<F,STAR,STAR> SInv_STAR_STAR(g);
DistMatrix<F,STAR,MC > ZAdj_STAR_MC(g);
DistMatrix<F,STAR,VC > ZAdj_STAR_VC(g);
LockedPartitionDownOffsetDiagonal
( offset,
H, HTL, HTR,
HBL, HBR, 0 );
LockedPartitionDown
( t, tT,
tB, 0 );
while( HTL.Height() < H.Height() && HTL.Width() < H.Width() )
{
LockedRepartitionDownDiagonal
( HTL, /**/ HTR, H00, /**/ H01, H02,
/*************/ /******************/
/**/ H10, /**/ H11, H12,
HBL, /**/ HBR, H20, /**/ H21, H22 );
LockedRepartitionDown
( tT, t0,
/**/ /**/
t1,
tB, t2 );
LockedView2x1( HPan, H01, H11 );
View( ALeft, A, 0, 0, A.Height(), HPan.Height() );
HPan_MR_STAR.AlignWith( ALeft );
ZAdj_STAR_MC.AlignWith( ALeft );
ZAdj_STAR_VC.AlignWith( ALeft );
//--------------------------------------------------------------------//
HPanCopy = HPan;
MakeTrapezoidal( UPPER, HPanCopy, 0, RIGHT );
SetDiagonal( HPanCopy, F(1), 0, RIGHT );
HPan_VC_STAR = HPanCopy;
Zeros( SInv_STAR_STAR, HPan.Width(), HPan.Width() );
Herk
( UPPER, ADJOINT,
F(1), HPan_VC_STAR.LockedMatrix(),
F(0), SInv_STAR_STAR.Matrix() );
SInv_STAR_STAR.SumOverGrid();
t1_STAR_STAR = t1;
FixDiagonal( conjugation, t1_STAR_STAR, SInv_STAR_STAR );
HPan_MR_STAR = HPan_VC_STAR;
LocalGemm( ADJOINT, ADJOINT, F(1), HPan_MR_STAR, ALeft, ZAdj_STAR_MC );
ZAdj_STAR_VC.SumScatterFrom( ZAdj_STAR_MC );
LocalTrsm
( LEFT, UPPER, ADJOINT, NON_UNIT,
F(1), SInv_STAR_STAR, ZAdj_STAR_VC );
ZAdj_STAR_MC = ZAdj_STAR_VC;
LocalGemm
( ADJOINT, ADJOINT, F(-1), ZAdj_STAR_MC, HPan_MR_STAR, F(1), ALeft );
//--------------------------------------------------------------------//
SlideLockedPartitionDownDiagonal
( HTL, /**/ HTR, H00, H01, /**/ H02,
/**/ H10, H11, /**/ H12,
/*************/ /******************/
HBL, /**/ HBR, H20, H21, /**/ H22 );
SlideLockedPartitionDown
( tT, t0,
t1,
/**/ /**/
tB, t2 );
}
//.........这里部分代码省略.........