本文整理汇总了C++中MAGMA_Z_REAL函数的典型用法代码示例。如果您正苦于以下问题:C++ MAGMA_Z_REAL函数的具体用法?C++ MAGMA_Z_REAL怎么用?C++ MAGMA_Z_REAL使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MAGMA_Z_REAL函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: SVDMatrix_magma
void SVDMatrix_magma(Tensor_core<complex<double>,2>& U, Tensor_core<double,1>& D, Tensor_core<complex<double>,2>& V)
{
if( U.rank(0)!=U.rank(1) || U.rank(1)!=D.rank(0) || D.rank(0)!=V.rank(0) || V.rank(0)!=V.rank(1) )
{
cout<<"Size is not consistent in SVDMatrix_magma! Only support square matrix."<<endl;
exit(1);
}
magma_int_t m=U.rank(0); magma_int_t n=V.rank(0);
magma_vec_t jobz(MagmaOverwriteVec); magma_int_t lda=m;
magmaDoubleComplex* u=nullptr; magma_int_t ldu=1; magma_int_t ldv=n;
magmaDoubleComplex work_test[1]; magma_int_t lwork=-1;
double* rwork; magma_int_t* iwork;
magma_dmalloc_cpu( &rwork, 5*m*m+7*m ); magma_imalloc_cpu(&iwork, 8*m);
magma_int_t info;
magma_zgesdd(jobz, m, n, (magmaDoubleComplex *) U.data(), lda, D.data(), u, ldu, (magmaDoubleComplex *) V.data(), ldv,
work_test, lwork, rwork, iwork, &info);
lwork=lround( MAGMA_Z_REAL(work_test[0]) );
magmaDoubleComplex* work; magma_zmalloc_cpu(&work, lwork);
magma_zgesdd(jobz, m, n, (magmaDoubleComplex *) U.data(), lda, D.data(), u, ldu, (magmaDoubleComplex *) V.data(), ldv,
work, lwork, rwork, iwork, &info);
magma_free_cpu(work); magma_free_cpu(rwork); magma_free_cpu(iwork);
if(info!=0)
{
cout<<"SVDMatrix_magma is not suceesful, info= "<<info<<endl;
exit(1);
}
}
示例2: QRMatrix_magma
double QRMatrix_magma(Tensor_core<complex<double>,2>& ph, Tensor_core<double,1>& det_list)
{
if( det_list.rank(0) != ph.rank(1) ) {cout<<"det_list size is not consistent with ph!"<<endl; exit(1); }
magma_int_t L=ph.rank(0); magma_int_t N=ph.rank(1); magma_int_t info;
int L_cpu = L; int N_cpu = N;
magmaDoubleComplex* tau; magma_zmalloc_cpu( &tau, N );
magmaDoubleComplex work_test[1]; magma_int_t lwork=-1;
magma_zgeqrf(L, N, (magmaDoubleComplex *)ph.data(), L, tau, work_test, lwork, &info);
lwork=lround( MAGMA_Z_REAL(work_test[0]) );
magmaDoubleComplex* work; magma_zmalloc_cpu( &work, lwork );
magma_zgeqrf(L, N, (magmaDoubleComplex *)ph.data(), L, tau, work, lwork, &info);
if(info!=0) {cout<<"QR run is not suceesful: "<<info<<"-th parameter is illegal!"<<endl; exit(1);}
complex<double> det={1.0,0.0};
for (int i=0; i<N_cpu; i++) {det_list(i)=ph(i,i).real(); det*=ph(i,i);}
magma_zungqr2(L, N, N, (magmaDoubleComplex *)ph.data(), L, tau, &info );
if(info!=0) {cout<<"magma_zungqr2 run is not suceesful: "<<info<<"-th parameter is illegal!"<<endl; exit(1);}
//Reshape the phi to get positive det
if(det.real()<0)
{
det=-det;
det_list(0)=-det_list(0);
for(int i=0; i<L_cpu; i++) ph(i,0)=-ph(i,0);
}
magma_free_cpu(tau); magma_free_cpu(work);
return det.real();
}
示例3: magma_z_isinf
/** @return true if either real(x) or imag(x) is INF. */
inline bool magma_z_isinf( magmaDoubleComplex x )
{
#ifdef COMPLEX
return isinf( MAGMA_Z_REAL( x )) ||
isinf( MAGMA_Z_IMAG( x ));
#else
return isinf( x );
#endif
}
示例4: magma_zmake_hpd
void magma_zmake_hpd( magma_int_t N, magmaDoubleComplex* A, magma_int_t lda )
{
magma_int_t i, j;
for( i=0; i<N; ++i ) {
A(i,i) = MAGMA_Z_MAKE( MAGMA_Z_REAL( A(i,i) ) + N, 0. );
for( j=0; j<i; ++j ) {
A(j,i) = MAGMA_Z_CNJG( A(i,j) );
}
}
}
示例5: magma_zrowentries
extern "C" magma_int_t
magma_zrowentries(
magma_z_matrix *A,
magma_queue_t queue )
{
magma_int_t info = 0;
magma_index_t *length=NULL;
magma_index_t i,j, maxrowlength=0;
// check whether matrix on CPU
if ( A->memory_location == Magma_CPU ) {
// CSR
if ( A->storage_type == Magma_CSR ) {
CHECK( magma_index_malloc_cpu( &length, A->num_rows));
for( i=0; i<A->num_rows; i++ ) {
length[i] = A->row[i+1]-A->row[i];
if (length[i] > maxrowlength)
maxrowlength = length[i];
}
A->max_nnz_row = maxrowlength;
}
// Dense
else if ( A->storage_type == Magma_DENSE ) {
CHECK( magma_index_malloc_cpu( &length, A->num_rows));
for( i=0; i<A->num_rows; i++ ) {
length[i] = 0;
for( j=0; j<A->num_cols; j++ ) {
if ( MAGMA_Z_REAL( A->val[i*A->num_cols + j] ) != 0. )
length[i]++;
}
if (length[i] > maxrowlength)
maxrowlength = length[i];
}
A->max_nnz_row = maxrowlength;
}
} // end CPU case
else {
printf("error: matrix not on CPU.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
}
cleanup:
magma_free( length );
return info;
}
示例6: eigen_magma
void eigen_magma(Tensor_core<complex<double>,2>& A, Tensor_core<double,1>& W, char JOBZ, char UPLO)
{
if( A.rank(0) != A.rank(1) ) {cout<<"Input for eigen is not square matrix!"<<endl; exit(1);}
if( A.rank(0) != W.rank(0) ) {cout<<"Input size of W is not consistent with A!"<<endl; exit(1);}
magma_vec_t jobz = magma_vec_const(JOBZ); magma_uplo_t uplo = magma_uplo_const(UPLO);
magma_int_t N=A.rank(0); magma_int_t info;
magmaDoubleComplex work_test[1]; double rwork_test[1]; magma_int_t iwork_test[1];
magma_int_t lwork=-1; magma_int_t lrwork=-1; magma_int_t liwork=-1;
magma_zheevd( jobz, uplo, N, (magmaDoubleComplex* ) A.data(), N, W.data(),
work_test, lwork, rwork_test, lrwork, iwork_test, liwork, &info );
lwork=lround( MAGMA_Z_REAL(work_test[0]) ); lrwork=lround(rwork_test[0]); liwork=iwork_test[0];
magmaDoubleComplex* work; double* rwork; magma_int_t* iwork;
magma_zmalloc_cpu(&work, lwork); magma_dmalloc_cpu(&rwork, lrwork); magma_imalloc_cpu(&iwork, liwork);
magma_zheevd( jobz, uplo, N, (magmaDoubleComplex* ) A.data(), N, W.data(),
work, lwork, rwork, lrwork, iwork, liwork, &info );
magma_free_cpu(work); magma_free_cpu(rwork); magma_free_cpu(iwork);
if(info!=0) {cout<<"Zheevd failed: info= "<< info<<endl; exit(1);}
}
示例7: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zhesv
*/
int main( int argc, char** argv)
{
TESTING_INIT();
magmaDoubleComplex *h_A, *h_B, *h_X, *work, temp;
real_Double_t gflops, gpu_perf, gpu_time = 0.0, cpu_perf=0, cpu_time=0;
double error, error_lapack = 0.0;
magma_int_t *ipiv;
magma_int_t N, n2, lda, ldb, sizeB, lwork, info;
magma_int_t status = 0, ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_opts opts;
opts.parse_opts( argc, argv );
double tol = opts.tolerance * lapackf77_dlamch("E");
printf("%% M N CPU Gflop/s (sec) GPU Gflop/s (sec) |Ax-b|/(N*|A|*|x|)\n");
printf("%%========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
ldb = N;
lda = N;
n2 = lda*N;
sizeB = ldb*opts.nrhs;
gflops = ( FLOPS_ZPOTRF( N ) + FLOPS_ZPOTRS( N, opts.nrhs ) ) / 1e9;
TESTING_MALLOC_CPU( ipiv, magma_int_t, N );
TESTING_MALLOC_PIN( h_A, magmaDoubleComplex, n2 );
TESTING_MALLOC_PIN( h_B, magmaDoubleComplex, sizeB );
TESTING_MALLOC_PIN( h_X, magmaDoubleComplex, sizeB );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
lwork = -1;
lapackf77_zhesv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs,
h_A, &lda, ipiv, h_X, &ldb, &temp, &lwork, &info);
lwork = (int)MAGMA_Z_REAL(temp);
TESTING_MALLOC_CPU( work, magmaDoubleComplex, lwork );
init_matrix( N, N, h_A, lda );
lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B );
lapackf77_zlacpy( MagmaFullStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb );
cpu_time = magma_wtime();
lapackf77_zhesv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs,
h_A, &lda, ipiv, h_X, &ldb, work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0) {
printf("lapackf77_zhesv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
error_lapack = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb );
TESTING_FREE_CPU( work );
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
init_matrix( N, N, h_A, lda );
lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B );
lapackf77_zlacpy( MagmaFullStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb );
magma_setdevice(0);
gpu_time = magma_wtime();
magma_zhesv( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0) {
printf("magma_zhesv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) N, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f)",
(int) N, (int) N, gpu_perf, gpu_time );
}
if ( opts.check == 0 ) {
printf(" --- \n");
} else {
error = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb );
printf(" %8.2e %s", error, (error < tol ? "ok" : "failed"));
if (opts.lapack)
printf(" (lapack rel.res. = %8.2e)", error_lapack);
printf("\n");
//.........这里部分代码省略.........
示例8: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zgeqrf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
double error, work[1];
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magmaDoubleComplex *h_A, *h_R, *tau, *dtau, *h_work, tmp[1];
magmaDoubleComplex *d_A;
double *dwork;
magma_int_t M, N, n2, lda, ldda, lwork, info, min_mn;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
printf(" M N CPU GFlop/s (ms) GPU GFlop/s (ms) ||R||_F / ||A||_F\n");
printf("=======================================================================\n");
for( int i = 0; i < opts.ntest; ++i ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[i];
N = opts.nsize[i];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
gflops = FLOPS_ZGEQRF( M, N ) / 1e9;
lwork = -1;
lapackf77_zgeqrf(&M, &N, h_A, &M, tau, tmp, &lwork, &info);
lwork = (magma_int_t)MAGMA_Z_REAL( tmp[0] );
TESTING_MALLOC( tau, magmaDoubleComplex, min_mn );
TESTING_MALLOC( h_A, magmaDoubleComplex, n2 );
TESTING_HOSTALLOC( h_R, magmaDoubleComplex, n2 );
TESTING_DEVALLOC( d_A, magmaDoubleComplex, ldda*N );
TESTING_DEVALLOC( dtau, magmaDoubleComplex, min_mn );
TESTING_DEVALLOC(dwork, double, min_mn );
TESTING_MALLOC( h_work, magmaDoubleComplex, lwork );
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
magma_zsetmatrix( M, N, h_R, lda, d_A, ldda );
// warmup
magma_zgeqr2_gpu( M, N, d_A, ldda, dtau, dwork, &info );
magma_zsetmatrix( M, N, h_R, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_sync_wtime( 0 );
magma_zgeqr2_gpu( M, N, d_A, ldda, dtau, dwork, &info );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_zgeqrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( opts.lapack ) {
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_zgeqrf(&M, &N, h_A, &lda, tau, h_work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_zgeqrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
magma_zgetmatrix( M, N, d_A, ldda, h_R, M );
error = lapackf77_zlange("f", &M, &N, h_A, &lda, work);
blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_zlange("f", &M, &N, h_R, &lda, work) / error;
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n",
(int) M, (int) N, cpu_perf, 1000.*cpu_time, gpu_perf, 1000.*gpu_time, error );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f) --- \n",
(int) M, (int) N, gpu_perf, 1000.*gpu_time );
}
TESTING_FREE( tau );
TESTING_FREE( h_A );
TESTING_FREE( h_work );
TESTING_HOSTFREE( h_R );
//.........这里部分代码省略.........
示例9: interval
//.........这里部分代码省略.........
lrwmin = n;
liwmin = 1;
}
// multiply by 1+eps (in Double!) to ensure length gets rounded up,
// if it cannot be exactly represented in floating point.
real_Double_t one_eps = 1. + lapackf77_dlamch("Epsilon");
work[0] = MAGMA_Z_MAKE( lwmin * one_eps, 0.);
rwork[0] = lrwmin * one_eps;
iwork[0] = liwmin;
if ((lwork < lwmin) && !lquery) {
*info = -14;
} else if ((lrwork < lrwmin) && ! lquery) {
*info = -16;
} else if ((liwork < liwmin) && ! lquery) {
*info = -18;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
else if (lquery) {
return *info;
}
/* Quick return if possible */
if (n == 0) {
return *info;
}
if (n == 1) {
w[0] = MAGMA_Z_REAL(A[0]);
if (wantz) {
A[0] = MAGMA_Z_ONE;
}
return *info;
}
/* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
if (n <= 128) {
#ifdef ENABLE_DEBUG
printf("--------------------------------------------------------------\n");
printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
printf("--------------------------------------------------------------\n");
#endif
lapackf77_zheevd(jobz_, uplo_,
&n, A, &lda,
w, work, &lwork,
#if defined(PRECISION_z) || defined(PRECISION_c)
rwork, &lrwork,
#endif
iwork, &liwork, info);
return *info;
}
/* Get machine constants. */
safmin = lapackf77_dlamch("Safe minimum");
eps = lapackf77_dlamch("Precision");
smlnum = safmin / eps;
bignum = 1. / smlnum;
rmin = magma_dsqrt(smlnum);
rmax = magma_dsqrt(bignum);
/* Scale matrix to allowable range, if necessary. */
anrm = lapackf77_zlanhe("M", uplo_, &n, A, &lda, rwork);
iscale = 0;
示例10: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zgeqrs
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
double gpu_error, cpu_error, matnorm, work[1];
magmaDoubleComplex c_one = MAGMA_Z_ONE;
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magmaDoubleComplex *h_A, *h_A2, *h_B, *h_X, *h_R, *tau, *h_work, tmp[1];
magmaDoubleComplex *d_A, *d_B;
magma_int_t M, N, n2, nrhs, lda, ldb, ldda, lddb, min_mn, max_mn, nb, info;
magma_int_t lworkgpu, lhwork, lhwork2;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_opts opts;
parse_opts( argc, argv, &opts );
magma_int_t status = 0;
double tol = opts.tolerance * lapackf77_dlamch("E");
nrhs = opts.nrhs;
printf(" ||b-Ax|| / (N||A||)\n");
printf(" M N NRHS CPU GFlop/s (sec) GPU GFlop/s (sec) CPU GPU \n");
printf("===============================================================================\n");
for( int i = 0; i < opts.ntest; ++i ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[i];
N = opts.nsize[i];
if ( M < N ) {
printf( "skipping M=%d, N=%d because M < N is not yet supported.\n", (int) M, (int) N );
continue;
}
min_mn = min(M, N);
max_mn = max(M, N);
lda = M;
ldb = max_mn;
n2 = lda*N;
ldda = ((M+31)/32)*32;
lddb = ((max_mn+31)/32)*32;
nb = magma_get_zgeqrf_nb(M);
gflops = (FLOPS_ZGEQRF( M, N ) + FLOPS_ZGEQRS( M, N, nrhs )) / 1e9;
// query for workspace size
lworkgpu = (M - N + nb)*(nrhs + nb) + nrhs*nb;
lhwork = -1;
lapackf77_zgeqrf(&M, &N, h_A, &M, tau, tmp, &lhwork, &info);
lhwork2 = (magma_int_t) MAGMA_Z_REAL( tmp[0] );
lhwork = -1;
lapackf77_zunmqr( MagmaLeftStr, MagmaConjTransStr,
&M, &nrhs, &min_mn, h_A, &lda, tau,
h_X, &ldb, tmp, &lhwork, &info);
lhwork = (magma_int_t) MAGMA_Z_REAL( tmp[0] );
lhwork = max( max( lhwork, lhwork2 ), lworkgpu );
TESTING_MALLOC( tau, magmaDoubleComplex, min_mn );
TESTING_MALLOC( h_A, magmaDoubleComplex, lda*N );
TESTING_MALLOC( h_A2, magmaDoubleComplex, lda*N );
TESTING_MALLOC( h_B, magmaDoubleComplex, ldb*nrhs );
TESTING_MALLOC( h_X, magmaDoubleComplex, ldb*nrhs );
TESTING_MALLOC( h_R, magmaDoubleComplex, ldb*nrhs );
TESTING_MALLOC( h_work, magmaDoubleComplex, lhwork );
TESTING_DEVALLOC( d_A, magmaDoubleComplex, ldda*N );
TESTING_DEVALLOC( d_B, magmaDoubleComplex, lddb*nrhs );
/* Initialize the matrices */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_A2, &lda );
// make random RHS
n2 = M*nrhs;
lapackf77_zlarnv( &ione, ISEED, &n2, h_B );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
// make consistent RHS
//n2 = N*nrhs;
//lapackf77_zlarnv( &ione, ISEED, &n2, h_X );
//blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
// &c_one, h_A, &lda,
// h_X, &ldb,
// &c_zero, h_B, &ldb );
//lapackf77_zlacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
magma_zsetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
gpu_time = magma_wtime();
magma_zgels3_gpu( MagmaNoTrans, M, N, nrhs, d_A, ldda,
d_B, lddb, h_work, lworkgpu, &info);
gpu_time = magma_wtime() - gpu_time;
//.........这里部分代码省略.........
示例11: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zcgeqrsv
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time, gpu_perfd, gpu_perfs;
double error, gpu_error, cpu_error, Anorm, work[1];
magmaDoubleComplex c_one = MAGMA_Z_ONE;
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magmaDoubleComplex *h_A, *h_A2, *h_B, *h_X, *h_R;
magmaDoubleComplex_ptr d_A, d_B, d_X, d_T;
magmaFloatComplex *d_SA, *d_SB;
magmaDoubleComplex *h_workd, *tau, tmp[1];
magmaFloatComplex *h_works;
magma_int_t lda, ldb, lhwork, lworkgpu;
magma_int_t ldda, lddb, lddx;
magma_int_t M, N, nrhs, qrsv_iters, info, size, min_mn, max_mn, nb;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
printf("Epsilon(double): %8.6e\n"
"Epsilon(single): %8.6e\n\n",
lapackf77_dlamch("Epsilon"), lapackf77_slamch("Epsilon") );
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
double tol = opts.tolerance * lapackf77_dlamch("E");
nrhs = opts.nrhs;
printf(" CPU Gflop/s GPU Gflop/s |b-Ax|| / (N||A||) ||dx-x||/(N||A||)\n");
printf(" M N NRHS double double single mixed Iter CPU GPU \n");
printf("=============================================================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
if ( M < N ) {
printf( "%5d %5d %5d skipping because M < N is not yet supported.\n", (int) M, (int) N, (int) nrhs );
continue;
}
min_mn = min(M, N);
max_mn = max(M, N);
lda = M;
ldb = max_mn;
ldda = ((M+31)/32) * 32;
lddb = ((max_mn+31)/32)*32;
lddx = ((N+31)/32) * 32;
nb = max( magma_get_zgeqrf_nb( M ), magma_get_cgeqrf_nb( M ) );
gflops = (FLOPS_ZGEQRF( M, N ) + FLOPS_ZGEQRS( M, N, nrhs )) / 1e9;
lworkgpu = (M - N + nb)*(nrhs + nb) + nrhs*nb;
// query for workspace size
lhwork = -1;
lapackf77_zgels( MagmaNoTransStr, &M, &N, &nrhs,
NULL, &lda, NULL, &ldb, tmp, &lhwork, &info );
lhwork = (magma_int_t) MAGMA_Z_REAL( tmp[0] );
lhwork = max( lhwork, lworkgpu );
TESTING_MALLOC_CPU( tau, magmaDoubleComplex, min_mn );
TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, lda*N );
TESTING_MALLOC_CPU( h_A2, magmaDoubleComplex, lda*N );
TESTING_MALLOC_CPU( h_B, magmaDoubleComplex, ldb*nrhs );
TESTING_MALLOC_CPU( h_X, magmaDoubleComplex, ldb*nrhs );
TESTING_MALLOC_CPU( h_R, magmaDoubleComplex, ldb*nrhs );
TESTING_MALLOC_CPU( h_workd, magmaDoubleComplex, lhwork );
h_works = (magmaFloatComplex*)h_workd;
TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N );
TESTING_MALLOC_DEV( d_B, magmaDoubleComplex, lddb*nrhs );
TESTING_MALLOC_DEV( d_X, magmaDoubleComplex, lddx*nrhs );
TESTING_MALLOC_DEV( d_T, magmaDoubleComplex, ( 2*min_mn + (N+31)/32*32 )*nb );
/* Initialize the matrices */
size = lda*N;
lapackf77_zlarnv( &ione, ISEED, &size, h_A );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_A2, &lda );
// make random RHS
size = ldb*nrhs;
lapackf77_zlarnv( &ione, ISEED, &size, h_B );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
magma_zsetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
//=====================================================================
// Mixed Precision Iterative Refinement - GPU
//=====================================================================
gpu_time = magma_wtime();
magma_zcgeqrsv_gpu( M, N, nrhs,
d_A, ldda, d_B, lddb,
d_X, lddx, &qrsv_iters, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
//.........这里部分代码省略.........
示例12: dimension
//.........这里部分代码省略.........
magma_zsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
/* Reduce the upper triangle of A.
Columns 1:kk are handled by the unblocked method. */
kk = n - (n - nx + nb - 1) / nb * nb;
for (i = n - nb; i >= kk; i -= nb) {
/* Reduce columns i:i+nb-1 to tridiagonal form and form the
matrix W which is needed to update the unreduced part of
the matrix */
/* Get the current panel (no need for the 1st iteration) */
if (i != n-nb)
magma_zgetmatrix( i+nb, nb, dA(0, i), ldda, A(0, i), lda );
#ifdef FAST_HEMV
magma_zlatrd2( uplo, i+nb, nb, A(0, 0), lda, e, tau,
work, ldw, dA(0, 0), ldda, dwork, lddw,
dwork2, ldwork2 );
#else
magma_zlatrd( uplo, i+nb, nb, A(0, 0), lda, e, tau,
work, ldw, dA(0, 0), ldda, dwork, lddw );
#endif
/* Update the unreduced submatrix A(0:i-2,0:i-2), using an
update of the form: A := A - V*W' - W*V' */
magma_zsetmatrix( i + nb, nb, work, ldw, dwork, lddw );
magma_zher2k( uplo, MagmaNoTrans, i, nb, c_neg_one,
dA(0, i), ldda, dwork, lddw,
d_one, dA(0, 0), ldda );
/* Copy superdiagonal elements back into A, and diagonal
elements into D */
for (j = i; j < i+nb; ++j) {
*A(j-1,j) = MAGMA_Z_MAKE( e[j - 1], 0 );
d[j] = MAGMA_Z_REAL( *A(j, j) );
}
}
magma_zgetmatrix( kk, kk, dA(0, 0), ldda, A(0, 0), lda );
/* Use CPU code to reduce the last or only block */
lapackf77_zhetrd( uplo_, &kk, A(0, 0), &lda, d, e, tau, work, &lwork, &iinfo );
}
else {
/* Copy the matrix to the GPU */
if (1 <= n-nx)
magma_zsetmatrix( n, n, A(0,0), lda, dA(0,0), ldda );
/* Reduce the lower triangle of A */
for (i = 0; i < n-nx; i += nb) {
/* Reduce columns i:i+nb-1 to tridiagonal form and form the
matrix W which is needed to update the unreduced part of
the matrix */
/* Get the current panel (no need for the 1st iteration) */
if (i != 0)
magma_zgetmatrix( n-i, nb, dA(i, i), ldda, A(i, i), lda );
#ifdef FAST_HEMV
magma_zlatrd2( uplo, n-i, nb, A(i, i), lda, &e[i], &tau[i],
work, ldw, dA(i, i), ldda, dwork, lddw,
dwork2, ldwork2 );
#else
magma_zlatrd( uplo, n-i, nb, A(i, i), lda, &e[i], &tau[i],
work, ldw, dA(i, i), ldda, dwork, lddw );
#endif
/* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
an update of the form: A := A - V*W' - W*V' */
magma_zsetmatrix( n-i, nb, work, ldw, dwork, lddw );
magma_zher2k( MagmaLower, MagmaNoTrans, n-i-nb, nb, c_neg_one,
dA(i+nb, i), ldda, &dwork[nb], lddw,
d_one, dA(i+nb, i+nb), ldda );
/* Copy subdiagonal elements back into A, and diagonal
elements into D */
for (j = i; j < i+nb; ++j) {
*A(j+1,j) = MAGMA_Z_MAKE( e[j], 0 );
d[j] = MAGMA_Z_REAL( *A(j, j) );
}
}
/* Use CPU code to reduce the last or only block */
if (1 <= n-nx)
magma_zgetmatrix( n-i, n-i, dA(i, i), ldda, A(i, i), lda );
i_n = n-i;
lapackf77_zhetrd( uplo_, &i_n, A(i, i), &lda, &d[i], &e[i],
&tau[i], work, &lwork, &iinfo );
}
magma_free( dA );
work[0] = MAGMA_Z_MAKE( lwkopt, 0 );
return *info;
} /* magma_zhetrd */
示例13: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zgeqlf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
double error, work[1];
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magmaDoubleComplex *h_A, *h_R, *tau, *h_work, tmp[1];
magma_int_t M, N, n2, lda, ldda, lwork, info, min_mn, nb;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
double tol = 2. * opts.tolerance * lapackf77_dlamch("E");
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F / ||A||_F\n");
printf("=======================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
nb = magma_get_zgeqrf_nb(M);
gflops = FLOPS_ZGEQLF( M, N ) / 1e9;
// query for workspace size
lwork = -1;
lapackf77_zgeqlf(&M, &N, NULL, &M, NULL, tmp, &lwork, &info);
lwork = (magma_int_t)MAGMA_Z_REAL( tmp[0] );
lwork = max( lwork, N*nb );
lwork = max( lwork, 2*nb*nb);
TESTING_MALLOC_CPU( tau, magmaDoubleComplex, min_mn );
TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 );
TESTING_MALLOC_CPU( h_work, magmaDoubleComplex, lwork );
TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 );
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_zgeqlf( M, N, h_R, lda, tau, h_work, lwork, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_zgeqlf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_zgeqlf(&M, &N, h_A, &lda, tau, h_work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapack_zgeqlf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
error = lapackf77_zlange("f", &M, &N, h_A, &lda, work);
blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_zlange("f", &M, &N, h_R, &lda, work) / error;
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed"));
status += ! (error < tol);
TESTING_FREE_CPU( tau );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_work );
TESTING_FREE_PIN( h_R );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例14: magma_zgeev
//.........这里部分代码省略.........
}
/* If INFO > 0 from ZHSEQR, then quit */
if (*info > 0) {
goto L50;
}
if (wantvl || wantvr) {
/* Compute left and/or right eigenvectors
(CWorkspace: need 2*N)
(RWorkspace: need 2*N) */
irwork = ibal + n;
lapackf77_ztrevc(side, "B", select, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl,
&vr[vr_offset], &ldvr, &n, &nout, &work[iwrk], &rwork[irwork],
&ierr);
}
if (wantvl) {
/* Undo balancing of left eigenvectors
(CWorkspace: none)
(RWorkspace: need N) */
lapackf77_zgebak("B", "L", &n, &ilo, &ihi, &rwork[ibal], &n,
&vl[vl_offset], &ldvl, &ierr);
/* Normalize left eigenvectors and make largest component real */
for (i__ = 1; i__ <= n; ++i__) {
scl = 1. / cblas_dznrm2(n, &vl[i__ * vl_dim1 + 1], 1);
cblas_zdscal(n, scl, &vl[i__ * vl_dim1 + 1], 1);
i__2 = n;
for (k = 1; k <= i__2; ++k)
{
i__3 = k + i__ * vl_dim1;
/* Computing 2nd power */
d__1 = MAGMA_Z_REAL(vl[i__3]);
/* Computing 2nd power */
d__2 = MAGMA_Z_IMAG(vl[k + i__ * vl_dim1]);
rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2;
}
/* Comment:
Fortran BLAS does not have to add 1
C BLAS must add one to cblas_idamax */
k = cblas_idamax(n, &rwork[irwork], 1)+1;
z__2 = MAGMA_Z_CNJG(vl[k + i__ * vl_dim1]);
d__1 = magma_dsqrt(rwork[irwork + k - 1]);
MAGMA_Z_DSCALE(z__1, z__2, d__1);
tmp = z__1;
cblas_zscal(n, CBLAS_SADDR(tmp), &vl[i__ * vl_dim1 + 1], 1);
i__2 = k + i__ * vl_dim1;
i__3 = k + i__ * vl_dim1;
d__1 = MAGMA_Z_REAL(vl[i__3]);
MAGMA_Z_SET2REAL(z__1, d__1);
vl[i__2] = z__1;
}
}
if (wantvr) {
/* Undo balancing of right eigenvectors
(CWorkspace: none)
(RWorkspace: need N) */
lapackf77_zgebak("B", "R", &n, &ilo, &ihi, &rwork[ibal], &n,
&vr[vr_offset], &ldvr, &ierr);
/* Normalize right eigenvectors and make largest component real */
for (i__ = 1; i__ <= n; ++i__) {
scl = 1. / cblas_dznrm2(n, &vr[i__ * vr_dim1 + 1], 1);
cblas_zdscal(n, scl, &vr[i__ * vr_dim1 + 1], 1);
示例15: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zgetrf
*/
int main( int argc, char** argv)
{
real_Double_t gflops, gpu_perf, cpu_perf, gpu_time, cpu_time, error;
magmaDoubleComplex *h_A, *h_R;
magmaDoubleComplex_ptr d_A, dwork;
magma_int_t N = 0, n2, lda, ldda;
magma_int_t size[10] = { 1024, 2048, 3072, 4032, 5184, 5600, 5600, 5600, 5600, 5600 };
magma_int_t ntest = 10;
magma_int_t i, info;
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0, 0, 0, 1};
magmaDoubleComplex *work;
magmaDoubleComplex tmp;
double rwork[1];
magma_int_t *ipiv;
magma_int_t lwork, ldwork;
double A_norm, R_norm;
if (argc != 1){
for(i = 1; i<argc; i++){
if (strcmp("-N", argv[i])==0)
N = atoi(argv[++i]);
}
if (N>0) size[0] = size[ntest-1] = N;
else exit(1);
}
else {
printf("\nUsage: \n");
printf(" testing_zgetri_gpu -N %d\n\n", 1024);
}
/* query for Lapack workspace size */
N = size[ntest-1];
lda = N;
work = &tmp;
lwork = -1;
lapackf77_zgetri( &N, h_A, &lda, ipiv, work, &lwork, &info );
if (info != 0)
printf("lapackf77_zgetri returned error %d\n", (int) info);
lwork = int( MAGMA_Z_REAL( *work ));
/* query for Magma workspace size */
ldwork = N * magma_get_zgetri_nb( N );
/* Initialize */
magma_queue_t queue;
magma_device_t device[ MagmaMaxGPUs ];
int num = 0;
magma_err_t err;
magma_init();
err = magma_get_devices( device, MagmaMaxGPUs, &num );
if ( err != 0 || num < 1 ) {
fprintf( stderr, "magma_get_devices failed: %d\n", err );
exit(-1);
}
err = magma_queue_create( device[0], &queue );
if ( err != 0 ) {
fprintf( stderr, "magma_queue_create failed: %d\n", err );
exit(-1);
}
/* Allocate memory */
n2 = N * N;
ldda = ((N+31)/32) * 32;
TESTING_MALLOC_CPU( ipiv, magma_int_t, N );
TESTING_MALLOC_CPU( work, magmaDoubleComplex, lwork );
TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 );
TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 );
TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N );
TESTING_MALLOC_DEV( dwork, magmaDoubleComplex, ldwork );
printf(" N CPU GFlop/s GPU GFlop/s ||R||_F / ||A||_F\n");
printf("========================================================\n");
for( i=0; i < ntest; i++ ){
N = size[i];
lda = N;
n2 = lda*N;
gflops = FLOPS_ZGETRI( (double)N ) / 1e9;
ldda = ((N+31)/32)*32;
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
A_norm = lapackf77_zlange( "f", &N, &N, h_A, &lda, rwork );
/* Factor the matrix. Both MAGMA and LAPACK will use this factor. */
magma_zsetmatrix( N, N, h_A, 0, lda, d_A, 0, ldda, queue );
magma_zgetrf_gpu( N, N, d_A, 0, ldda, ipiv, &info, queue );
magma_zgetmatrix( N, N, d_A, 0, ldda, h_A, 0, lda, queue );
// check for exact singularity
//h_A[ 10 + 10*lda ] = MAGMA_Z_MAKE( 0.0, 0.0 );
//magma_zsetmatrix( N, N, h_A, lda, d_A, ldda );
//.........这里部分代码省略.........