本文整理汇总了C++中TESTING_FREE_PIN函数的典型用法代码示例。如果您正苦于以下问题:C++ TESTING_FREE_PIN函数的具体用法?C++ TESTING_FREE_PIN怎么用?C++ TESTING_FREE_PIN使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了TESTING_FREE_PIN函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
//.........这里部分代码省略.........
// copy K by N matrix L
lapackf77_zlaset( "Full", &min_mn, &N, &c_zero, &c_zero, L, &ldl );
if ( M >= N ) {
// for M=5, N=3: A = [ V V V ] <= V full block (M-N by K)
// K=N [ V V V ]
// [ ----- ]
// [ L V V ] <= V triangle (N by K, copying diagonal too)
// [ L L V ] <= L triangle (K by N)
// [ L L L ]
magma_int_t M_N = M - N;
lapackf77_zlacpy( "Full", &M_N, &min_mn, h_R, &lda, Q, &ldq );
lapackf77_zlacpy( "Upper", &N, &min_mn, &h_R[M_N], &lda, &Q[M_N], &ldq );
lapackf77_zlacpy( "Lower", &min_mn, &N, &h_R[M_N], &lda, L, &ldl );
}
else {
// for M=3, N=5: A = [ L L | L V V ] <= V triangle (K by K)
// K=M [ L L | L L V ] <= L triangle (K by M)
// [ L L | L L L ]
// ^^^============= L full block (K by N-M)
magma_int_t N_M = N - M;
lapackf77_zlacpy( "Upper", &M, &min_mn, &h_R[N_M*lda], &lda, Q, &ldq );
lapackf77_zlacpy( "Full", &min_mn, &N_M, h_R, &lda, L, &ldl );
lapackf77_zlacpy( "Lower", &min_mn, &M, &h_R[N_M*lda], &lda, &L[N_M*ldl], &ldl );
}
// generate M by K matrix Q, where K = min(M,N)
lapackf77_zungql( &M, &min_mn, &min_mn, Q, &ldq, tau, h_work, &lwork, &info );
assert( info == 0 );
// error = || L - Q^H*A || / (N * ||A||)
blasf77_zgemm( "Conj", "NoTrans", &min_mn, &N, &M,
&c_neg_one, Q, &ldq, h_A, &lda, &c_one, L, &ldl );
Anorm = lapackf77_zlange( "1", &M, &N, h_A, &lda, work );
error = lapackf77_zlange( "1", &min_mn, &N, L, &ldl, work );
if ( N > 0 && Anorm > 0 )
error /= (N*Anorm);
// set L = I (K by K identity), then L = I - Q^H*Q
// error = || I - Q^H*Q || / N
lapackf77_zlaset( "Upper", &min_mn, &min_mn, &c_zero, &c_one, L, &ldl );
blasf77_zherk( "Upper", "Conj", &min_mn, &M, &d_neg_one, Q, &ldq, &d_one, L, &ldl );
error2 = lapackf77_zlanhe( "1", "Upper", &min_mn, L, &ldl, work );
if ( N > 0 )
error2 /= N;
TESTING_FREE_CPU( Q ); Q = NULL;
TESTING_FREE_CPU( L ); L = NULL;
TESTING_FREE_CPU( work ); work = NULL;
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_zgeqlf( &M, &N, h_A, &lda, tau, h_work, &lwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapack_zgeqlf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Print performance and error.
=================================================================== */
printf("%5d %5d ", (int) M, (int) N );
if ( opts.lapack ) {
printf( "%7.2f (%7.2f)", cpu_perf, cpu_time );
}
else {
printf(" --- ( --- )" );
}
printf( " %7.2f (%7.2f) ", gpu_perf, gpu_time );
if ( opts.check ) {
bool okay = (error < tol && error2 < tol);
status += ! okay;
printf( "%11.2e %11.2e %s\n", error, error2, (okay ? "ok" : "failed") );
}
else {
printf( " ---\n" );
}
TESTING_FREE_CPU( tau );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_work );
TESTING_FREE_PIN( h_R );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例2: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgetrf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0;
float error;
magmaFloatComplex *h_A, *h_R;
magmaFloatComplex *d_A;
magma_int_t *ipiv;
magma_int_t M, N, n2, lda, ldda, info, min_mn;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
printf(" M N CPU GFlop/s (ms) GPU GFlop/s (ms) ||PA-LU||/(||A||*N)\n");
printf("=========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
gflops = FLOPS_CGETRF( M, N ) / 1e9;
if ( N > 512 ) {
printf( "%5d %5d skipping because cgetf2 does not support N > 512\n", (int) M, (int) N );
continue;
}
TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn );
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 );
TESTING_MALLOC_PIN( h_R, magmaFloatComplex, n2 );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
magma_csetmatrix( M, N, h_R, lda, d_A, ldda );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_cgetrf(&M, &N, h_A, &lda, ipiv, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cgetrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_cgetf2_gpu( M, N, d_A, ldda, ipiv, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgetf2_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) M, (int) N, cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000. );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f)",
(int) M, (int) N, gpu_perf, gpu_time*1000. );
}
if ( opts.check ) {
magma_cgetmatrix( M, N, d_A, ldda, h_A, lda );
error = get_LU_error( M, N, h_R, lda, h_A, ipiv );
printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
}
else {
printf(" --- \n");
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_CPU( h_A );
TESTING_FREE_PIN( h_R );
TESTING_FREE_DEV( d_A );
fflush( stdout );
}
//.........这里部分代码省略.........
示例3: main
//.........这里部分代码省略.........
// solve linear system
magma_sgeqrs3_gpu( M, N, 1,
d_A, ldda, tau, dT,
d_B, M, hwork, lwork2, &info );
if (info != 0) {
printf("magma_sgeqrs3 returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
TESTING_FREE_CPU( hwork );
}
#endif
else {
printf( "Unknown version %d\n", (int) opts.version );
return -1;
}
magma_sgetvector( N, d_B, 1, x, 1 );
// compute r = Ax - b, saved in b
blasf77_sgemv( "Notrans", &M, &N, &c_one, h_A, &lda, x, &ione, &c_neg_one, b, &ione );
// compute residual |Ax - b| / (max(m,n)*|A|*|x|)
float norm_x, norm_A, norm_r, work[1];
norm_A = lapackf77_slange( "F", &M, &N, h_A, &lda, work );
norm_r = lapackf77_slange( "F", &M, &ione, b, &M, work );
norm_x = lapackf77_slange( "F", &N, &ione, x, &N, work );
TESTING_FREE_CPU( x );
TESTING_FREE_CPU( b );
TESTING_FREE_DEV( d_B );
error = norm_r / (max(M,N) * norm_A * norm_x);
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_sgeqrf( &M, &N, h_A, &lda, tau, h_work, &lwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0) {
printf("lapackf77_sgeqrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
}
/* =====================================================================
Print performance and error.
=================================================================== */
printf("%5d %5d ", (int) M, (int) N );
if ( opts.lapack ) {
printf( "%7.2f (%7.2f)", cpu_perf, cpu_time );
}
else {
printf(" --- ( --- )" );
}
printf( " %7.2f (%7.2f) ", gpu_perf, gpu_time );
if ( opts.check == 1 ) {
bool okay = (error < tol && error2 < tol);
status += ! okay;
printf( "%11.2e %11.2e %s\n", error, error2, (okay ? "ok" : "failed") );
}
else if ( opts.check == 2 ) {
if ( M >= N ) {
bool okay = (error < tol);
status += ! okay;
printf( "%10.2e %s\n", error, (okay ? "ok" : "failed") );
}
else {
printf( "(error check only for M >= N)\n" );
}
}
else {
printf( " ---\n" );
}
TESTING_FREE_CPU( tau );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_work );
TESTING_FREE_PIN( h_R );
TESTING_FREE_DEV( d_A );
if ( opts.version == 1 || opts.version == 3 ) {
TESTING_FREE_DEV( dT );
}
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}
示例4: main
//.........这里部分代码省略.........
=================================================================== */
gpu_time = magma_wtime();
magma_dsyevd( opts.jobz, opts.uplo,
N, h_R, lda, w1,
h_work, lwork,
iwork, liwork,
opts.queue, &info );
gpu_time = magma_wtime() - gpu_time;
if (info != 0)
printf("magma_dsyevd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( opts.check ) {
/* =====================================================================
Check the results following the LAPACK's [zcds]drvst routine.
A is factored as A = U S U' and the following 3 tests computed:
(1) | A - U S U' | / ( |A| N )
(2) | I - U'U | / ( N )
(3) | S(with U) - S(w/o U) | / | S |
=================================================================== */
double temp1, temp2;
// tau=NULL is unused since itype=1
lapackf77_dsyt21( &ione, lapack_uplo_const(opts.uplo), &N, &izero,
h_A, &lda,
w1, h_work,
h_R, &lda,
h_R, &lda,
NULL, h_work, &result[0] );
lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
magma_dsyevd( MagmaNoVec, opts.uplo,
N, h_R, lda, w2,
h_work, lwork,
iwork, liwork,
opts.queue, &info );
if (info != 0)
printf("magma_dsyevd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
temp1 = temp2 = 0;
for( int j=0; j<N; j++ ) {
temp1 = max(temp1, fabs(w1[j]));
temp1 = max(temp1, fabs(w2[j]));
temp2 = max(temp2, fabs(w1[j]-w2[j]));
}
result[2] = temp2 / (((double)N)*temp1);
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_dsyevd( lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo),
&N, h_A, &lda, w2,
h_work, &lwork,
iwork, &liwork,
&info );
cpu_time = magma_wtime() - cpu_time;
if (info != 0)
printf("lapackf77_dsyevd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
printf("%5d %7.2f %7.2f\n",
(int) N, cpu_time, gpu_time);
}
else {
printf("%5d --- %7.2f\n",
(int) N, gpu_time);
}
/* =====================================================================
Print execution time
=================================================================== */
if ( opts.check ) {
printf("Testing the factorization A = U S U' for correctness:\n");
printf("(1) | A - U S U' | / (|A| N) = %8.2e %s\n", result[0]*eps, (result[0]*eps < tol ? "ok" : "failed") );
printf("(2) | I - U'U | / N = %8.2e %s\n", result[1]*eps, (result[1]*eps < tol ? "ok" : "failed") );
printf("(3) | S(w/ U) - S(w/o U) | / |S| = %8.2e %s\n\n", result[2] , (result[2] < tolulp ? "ok" : "failed") );
status += ! (result[0]*eps < tol && result[1]*eps < tol && result[2] < tolulp);
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( w1 );
TESTING_FREE_CPU( w2 );
TESTING_FREE_CPU( iwork );
TESTING_FREE_PIN( h_R );
TESTING_FREE_PIN( h_work );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例5: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgehrd
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
magmaFloatComplex *h_A, *h_R, *h_Q, *h_work, *tau, *twork;
magmaFloatComplex_ptr dT;
#if defined(PRECISION_z) || defined(PRECISION_c)
float *rwork;
#endif
float eps, result[2];
magma_int_t N, n2, lda, nb, lwork, ltwork, info;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
eps = lapackf77_slamch( "E" );
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
printf(" N CPU GFlop/s (sec) GPU GFlop/s (sec) |A-QHQ'|/N|A| |I-QQ'|/N\n");
printf("=========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
n2 = lda*N;
nb = magma_get_cgehrd_nb(N);
/* We suppose the magma nb is bigger than lapack nb */
lwork = N*nb;
gflops = FLOPS_CGEHRD( N ) / 1e9;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 );
TESTING_MALLOC_CPU( tau, magmaFloatComplex, N );
TESTING_MALLOC_PIN( h_R, magmaFloatComplex, n2 );
TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork );
TESTING_MALLOC_DEV( dT, magmaFloatComplex, nb*N );
/* Initialize the matrices */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_cgehrd( N, ione, N, h_R, lda, tau, h_work, lwork, dT, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgehrd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.check ) {
ltwork = 2*(N*N);
TESTING_MALLOC_PIN( h_Q, magmaFloatComplex, lda*N );
TESTING_MALLOC_CPU( twork, magmaFloatComplex, ltwork );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_MALLOC_CPU( rwork, float, N );
#endif
lapackf77_clacpy(MagmaUpperLowerStr, &N, &N, h_R, &lda, h_Q, &lda);
for( int j = 0; j < N-1; ++j )
for( int i = j+2; i < N; ++i )
h_R[i+j*lda] = MAGMA_C_ZERO;
magma_cunghr(N, ione, N, h_Q, lda, tau, dT, nb, &info);
if (info != 0) {
printf("magma_cunghr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
exit(1);
}
#if defined(PRECISION_z) || defined(PRECISION_c)
lapackf77_chst01(&N, &ione, &N,
h_A, &lda, h_R, &lda,
h_Q, &lda, twork, <work, rwork, result);
#else
lapackf77_chst01(&N, &ione, &N,
h_A, &lda, h_R, &lda,
h_Q, &lda, twork, <work, result);
#endif
TESTING_FREE_PIN( h_Q );
TESTING_FREE_CPU( twork );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_FREE_CPU( rwork );
#endif
}
//.........这里部分代码省略.........
示例6: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zgeqlf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
double error, work[1];
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magmaDoubleComplex *h_A, *h_R, *tau, *h_work, tmp[1];
magma_int_t M, N, n2, lda, ldda, lwork, info, min_mn, nb;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
double tol = 2. * opts.tolerance * lapackf77_dlamch("E");
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F / ||A||_F\n");
printf("=======================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
nb = magma_get_zgeqrf_nb(M);
gflops = FLOPS_ZGEQLF( M, N ) / 1e9;
// query for workspace size
lwork = -1;
lapackf77_zgeqlf(&M, &N, NULL, &M, NULL, tmp, &lwork, &info);
lwork = (magma_int_t)MAGMA_Z_REAL( tmp[0] );
lwork = max( lwork, N*nb );
lwork = max( lwork, 2*nb*nb);
TESTING_MALLOC_CPU( tau, magmaDoubleComplex, min_mn );
TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 );
TESTING_MALLOC_CPU( h_work, magmaDoubleComplex, lwork );
TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 );
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_zgeqlf( M, N, h_R, lda, tau, h_work, lwork, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_zgeqlf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_zgeqlf(&M, &N, h_A, &lda, tau, h_work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapack_zgeqlf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
error = lapackf77_zlange("f", &M, &N, h_A, &lda, work);
blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_zlange("f", &M, &N, h_R, &lda, work) / error;
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed"));
status += ! (error < tol);
TESTING_FREE_CPU( tau );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_work );
TESTING_FREE_PIN( h_R );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例7: main
//.........这里部分代码省略.........
opts.parse_opts( argc, argv );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
float tol = opts.tolerance * lapackf77_slamch("E");
printf("%% uplo = %s\n", lapack_uplo_const(opts.uplo) );
printf("%% itype N CPU time (sec) GPU time (sec) |R| \n");
printf("%%=======================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
ldda = magma_roundup( lda, opts.align );
n2 = N*lda;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda*N );
TESTING_MALLOC_CPU( h_B, magmaFloatComplex, lda*N );
TESTING_MALLOC_PIN( h_R, magmaFloatComplex, lda*N );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
TESTING_MALLOC_DEV( d_B, magmaFloatComplex, ldda*N );
/* ====================================================================
Initialize the matrix
=================================================================== */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
lapackf77_clarnv( &ione, ISEED, &n2, h_B );
magma_cmake_hermitian( N, h_A, lda );
magma_cmake_hpd( N, h_B, lda );
magma_cpotrf( opts.uplo, N, h_B, lda, &info );
if (info != 0) {
printf("magma_cpotrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
magma_csetmatrix( N, N, h_A, lda, d_A, ldda );
magma_csetmatrix( N, N, h_B, lda, d_B, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_chegst_gpu( opts.itype, opts.uplo, N, d_A, ldda, d_B, ldda, &info );
gpu_time = magma_wtime() - gpu_time;
if (info != 0) {
printf("magma_chegst_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_chegst( &opts.itype, lapack_uplo_const(opts.uplo),
&N, h_A, &lda, h_B, &lda, &info );
cpu_time = magma_wtime() - cpu_time;
if (info != 0) {
printf("lapackf77_chegst returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
magma_cgetmatrix( N, N, d_A, ldda, h_R, lda );
blasf77_caxpy( &n2, &c_neg_one, h_A, &ione, h_R, &ione );
Anorm = safe_lapackf77_clanhe("f", lapack_uplo_const(opts.uplo), &N, h_A, &lda, work );
error = safe_lapackf77_clanhe("f", lapack_uplo_const(opts.uplo), &N, h_R, &lda, work )
/ Anorm;
bool okay = (error < tol);
status += ! okay;
printf("%3d %5d %7.2f %7.2f %8.2e %s\n",
(int) opts.itype, (int) N, cpu_time, gpu_time,
error, (okay ? "ok" : "failed"));
}
else {
printf("%3d %5d --- %7.2f\n",
(int) opts.itype, (int) N, gpu_time );
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_B );
TESTING_FREE_PIN( h_R );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_B );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}
示例8: main
//.........这里部分代码省略.........
nk = min(nb, N-j);
magma_sgetmatrix( j+nk, nk,
d_lA[k], j/(nb*tot_subs)*nb*ldda, ldda,
&h_R[j*lda], lda, queues[2*(k%num_gpus)]);
}
} else {
for (j=0; j<N; j+=nb) {
nk = min(nb, N-j);
for (magma_int_t kk = 0; kk<tot_subs; kk++) {
k = ((j+kk*nb)/nb)%tot_subs;
magma_int_t mk = 0;
mk = 0;
for (magma_int_t ii=j+kk*nb; ii<N; ii+=nb*tot_subs) {
mk += min(nb, N-ii);
}
if (mk > 0 && nk > 0) {
magma_sgetmatrix( mk, nk,
d_lA[k], j*ldda+(j+kk*nb)/(nb*tot_subs)*nb, ldda,
h_P, lda,
queues[2*(k%num_gpus)]);
}
mk = 0;
for (magma_int_t ii=j+kk*nb; ii<N; ii+=nb*tot_subs) {
magma_int_t mii = min(nb, N-ii);
lapackf77_slacpy( MagmaFullStr, &mii, &nk, &h_P[mk], &lda, &h_R[ii+j*lda], &lda );
mk += mii;
}
}
}
/*for (j=0; j<N; j+=nb) {
k = (j/nb)%tot_subs;
nk = min(nb, N-j);
magma_sgetmatrix( nk, j+nk,
d_lA[k], (j/(nb*tot_subs)*nb), ldda,
&h_R[j], lda, queues[2*(k%num_gpus)] );
}*/
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if (check == 1) {
float work[1], matnorm, diffnorm;
float *h_A;
TESTING_MALLOC_PIN( h_A, float, n2 );
init_matrix( N, h_A, lda );
cpu_time = magma_wtime();
if (uplo == MagmaLower) {
lapackf77_spotrf( MagmaLowerStr, &N, h_A, &lda, &info );
} else {
lapackf77_spotrf( MagmaUpperStr, &N, h_A, &lda, &info );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf( "lapackf77_spotrf had error %d.\n", info );
/* =====================================================================
Check the result compared to LAPACK
|R_magma - R_lapack| / |R_lapack|
=================================================================== */
matnorm = lapackf77_slange("f", &N, &N, h_A, &lda, work);
blasf77_saxpy(&n2, &mz_one, h_A, &ione, h_R, &ione);
diffnorm = lapackf77_slange("f", &N, &N, h_R, &lda, work);
printf( "%5d %6.2f (%6.2f) %6.2f (%6.2f) %e\n",
N, cpu_perf, cpu_time, gpu_perf, gpu_time, diffnorm / matnorm );
TESTING_FREE_PIN( h_A );
} else {
printf( "%5d - - (- -) %6.2f (%6.2f) - -\n",
N, gpu_perf, gpu_time );
}
// free memory
#ifdef USE_PINNED_CLMEMORY
for (k=0; k<num_gpus; k++) {
clEnqueueUnmapMemObject(queues[2*k], buffer1, h_R, 0, NULL, NULL);
clEnqueueUnmapMemObject(queues[2*k], buffer2, h_P, 0, NULL, NULL);
}
clReleaseMemObject(buffer1);
clReleaseMemObject(buffer2);
#else
TESTING_FREE_PIN( h_P );
TESTING_FREE_PIN( h_R );
#endif
for (j=0; j<tot_subs; j++) {
TESTING_FREE_DEV( d_lA[j] );
}
if (flag != 0)
break;
}
/* clean up */
for (i=0; i<num_gpus; i++) {
magma_queue_destroy( queues[2*i] );
magma_queue_destroy( queues[2*i+1] );
}
magma_finalize();
return 0;
}
示例9: main
//.........这里部分代码省略.........
magma_dgeev(MagmaNoVec, MagmaVec,
N, h_R, lda, w2, w2i,
&DUM, 1, LRE, lda,
h_work, lwork, &info, queue);
if (info != 0) {
result[0] = ulpinv;
info = abs(info);
printf("Info = %d fo case N, V\n", (int) info);
}
// Do test 5 again
result[4] = 1;
for (j = 0; j < N; ++j)
if ( w1[j] != w2[j] || w1i[j] != w2i[j] )
result[4] = 0;
//if (result[4] == 0) printf("test 5 failed with N V\n");
// Do test 6
result[5] = 1;
for (j = 0; j < N; ++j)
for (jj = 0; jj < N; ++jj)
if ( VR[j+jj*lda] != LRE[j+jj*lda] )
result[5] = 0;
// Compute eigenvalues and left eigenvectors, and test them
lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
magma_dgeev(MagmaVec, MagmaNoVec,
N, h_R, lda, w2, w2i,
LRE, lda, &DUM, 1,
h_work, lwork, &info, queue);
if (info != 0) {
result[0] = ulpinv;
info = abs(info);
printf("Info = %d fo case V, N\n", (int) info);
}
// Do test 5 again
result[4] = 1;
for (j = 0; j < N; ++j)
if ( w1[j] != w2[j] || w1i[j] != w2i[j] )
result[4] = 0;
//if (result[4] == 0) printf("test 5 failed with V N\n");
// Do test 7
result[6] = 1;
for (j = 0; j < N; ++j)
for (jj = 0; jj < N; ++jj)
if ( VL[j+jj*lda] != LRE[j+jj*lda] )
result[6] = 0;
printf("Test 1: | A * VR - VR * W | / ( n |A| ) = %e\n", result[0]);
printf("Test 2: | A'* VL - VL * W'| / ( n |A| ) = %e\n", result[1]);
printf("Test 3: | |VR(i)| - 1 | = %e\n", result[2]);
printf("Test 4: | |VL(i)| - 1 | = %e\n", result[3]);
printf("Test 5: W (full) == W (partial) = %f\n", result[4]);
printf("Test 6: VR (full) == VR (partial) = %f\n", result[5]);
printf("Test 7: VL (full) == VL (partial) = %f\n", result[6]);
//====================================================================
matnorm = lapackf77_dlange("f", &N, &ione, w1, &N, h_work);
blasf77_daxpy(&N, &c_neg_one, w1, &ione, w2, &ione);
result[7] = lapackf77_dlange("f", &N, &ione, w2, &N, h_work) / matnorm;
printf("%5d %6.2f %6.2f %e\n",
(int) N, cpu_time, gpu_time, result[7]);
TESTING_FREE_PIN( LRE );
}
else
{
printf("%5d %6.2f %6.2f\n",
(int) N, cpu_time, gpu_time);
}
if (argc != 1)
break;
}
/* Memory clean up */
TESTING_FREE_CPU( w1 );
TESTING_FREE_CPU( w2 );
TESTING_FREE_CPU( w1i );
TESTING_FREE_CPU( w2i );
TESTING_FREE_CPU( h_A );
TESTING_FREE_PIN( h_R );
TESTING_FREE_PIN( VL );
TESTING_FREE_PIN( VR );
TESTING_FREE_PIN( h_work );
/* Shutdown */
magma_queue_destroy( queue );
magma_finalize();
}
示例10: main
//.........这里部分代码省略.........
/* =====================================================================
* LAPACK-style claswpx (extended for row- and col-major) (1 matrix)
*/
#ifdef HAVE_CUBLAS
/* Row Major */
init_matrix( N, N, h_A1, lda, 0 );
magma_csetmatrix( N, N, h_A1, lda, d_A1, ldda );
time = magma_sync_wtime( queue );
magmablas_claswpx( N, d_A1, ldda, 1, 1, nb, ipiv, 1);
time = magma_sync_wtime( queue ) - time;
row_perf5 = gbytes / time;
for( j=0; j < nb; j++) {
if ( j != (ipiv[j]-1)) {
blasf77_cswap( &N, h_A1+lda*j, &ione, h_A1+lda*(ipiv[j]-1), &ione);
}
}
magma_cgetmatrix( N, N, d_A1, ldda, h_R1, lda );
check += diff_matrix( N, N, h_A1, lda, h_R1, lda )*shift;
shift *= 2;
/* Col Major */
init_matrix( N, N, h_A1, lda, 0 );
magma_csetmatrix( N, N, h_A1, lda, d_A1, ldda );
time = magma_sync_wtime( queue );
magmablas_claswpx( N, d_A1, 1, ldda, 1, nb, ipiv, 1);
time = magma_sync_wtime( queue ) - time;
col_perf5 = gbytes / time;
#endif
/* LAPACK swap on CPU for comparison */
time = magma_wtime();
lapackf77_claswp( &N, h_A1, &lda, &ione, &nb, ipiv, &ione);
time = magma_wtime() - time;
cpu_perf = gbytes / time;
#ifdef HAVE_CUBLAS
magma_cgetmatrix( N, N, d_A1, ldda, h_R1, lda );
check += diff_matrix( N, N, h_A1, lda, h_R1, lda )*shift;
shift *= 2;
#endif
/* =====================================================================
* Copy matrix.
*/
time = magma_sync_wtime( queue );
magma_ccopymatrix( N, nb, d_A1, ldda, d_A2, ldda );
time = magma_sync_wtime( queue ) - time;
// copy reads 1 matrix and writes 1 matrix, so has half gbytes of swap
col_perf6 = 0.5 * gbytes / time;
time = magma_sync_wtime( queue );
magma_ccopymatrix( nb, N, d_A1, ldda, d_A2, ldda );
time = magma_sync_wtime( queue ) - time;
// copy reads 1 matrix and writes 1 matrix, so has half gbytes of swap
row_perf6 = 0.5 * gbytes / time;
printf("%5d %3d %6.2f%c/ %6.2f%c %6.2f%c/ %6.2f%c %6.2f%c/ %6.2f%c %6.2f%c %6.2f%c %6.2f%c/ %6.2f%c %6.2f / %6.2f %6.2f %10s\n",
(int) N, (int) nb,
row_perf0, ((check & 0x001) != 0 ? '*' : ' '),
col_perf0, ((check & 0x002) != 0 ? '*' : ' '),
row_perf1, ((check & 0x004) != 0 ? '*' : ' '),
col_perf1, ((check & 0x008) != 0 ? '*' : ' '),
row_perf2, ((check & 0x010) != 0 ? '*' : ' '),
col_perf2, ((check & 0x020) != 0 ? '*' : ' '),
row_perf4, ((check & 0x040) != 0 ? '*' : ' '),
row_perf7, ((check & 0x080) != 0 ? '*' : ' '),
row_perf5, ((check & 0x100) != 0 ? '*' : ' '),
col_perf5, ((check & 0x200) != 0 ? '*' : ' '),
row_perf6,
col_perf6,
cpu_perf,
(check == 0 ? "ok" : "* failed") );
status += ! (check == 0);
TESTING_FREE_PIN( h_A1 );
TESTING_FREE_PIN( h_A2 );
TESTING_FREE_PIN( h_R1 );
TESTING_FREE_PIN( h_R2 );
TESTING_FREE_CPU( ipiv );
TESTING_FREE_CPU( ipiv2 );
TESTING_FREE_DEV( d_ipiv );
TESTING_FREE_DEV( d_A1 );
TESTING_FREE_DEV( d_A2 );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例11: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cpotrf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
magmaFloatComplex *h_A, *h_R;
magma_int_t N, n2, lda, info;
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
float work[1], error;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
float tol = opts.tolerance * lapackf77_slamch("E");
printf("ngpu = %d, uplo = %s\n", (int) opts.ngpu, lapack_uplo_const(opts.uplo) );
printf(" N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R_magma - R_lapack||_F / ||R_lapack||_F\n");
printf("========================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
n2 = lda*N;
gflops = FLOPS_CPOTRF( N ) / 1e9;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 );
TESTING_MALLOC_PIN( h_R, magmaFloatComplex, n2 );
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
magma_cmake_hpd( N, h_A, lda );
lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_cpotrf( opts.uplo, N, h_R, lda, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cpotrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( opts.lapack ) {
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_cpotrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cpotrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
error = lapackf77_clange("f", &N, &N, h_A, &lda, work);
blasf77_caxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_clange("f", &N, &N, h_R, &lda, work) / error;
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
}
else {
printf("%5d --- ( --- ) %7.2f (%7.2f) --- \n",
(int) N, gpu_perf, gpu_time );
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_PIN( h_R );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例12: main
//.........这里部分代码省略.........
double tol = opts.tolerance * lapackf77_dlamch("E");
printf("%% M N CPU Gflop/s (sec) GPU Gflop/s (sec) |Ax-b|/(N*|A|*|x|)\n");
printf("%%========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
ldb = N;
lda = N;
n2 = lda*N;
sizeB = ldb*opts.nrhs;
gflops = ( FLOPS_ZPOTRF( N ) + FLOPS_ZPOTRS( N, opts.nrhs ) ) / 1e9;
TESTING_MALLOC_CPU( ipiv, magma_int_t, N );
TESTING_MALLOC_PIN( h_A, magmaDoubleComplex, n2 );
TESTING_MALLOC_PIN( h_B, magmaDoubleComplex, sizeB );
TESTING_MALLOC_PIN( h_X, magmaDoubleComplex, sizeB );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
lwork = -1;
lapackf77_zhesv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs,
h_A, &lda, ipiv, h_X, &ldb, &temp, &lwork, &info);
lwork = (int)MAGMA_Z_REAL(temp);
TESTING_MALLOC_CPU( work, magmaDoubleComplex, lwork );
init_matrix( N, N, h_A, lda );
lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B );
lapackf77_zlacpy( MagmaFullStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb );
cpu_time = magma_wtime();
lapackf77_zhesv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs,
h_A, &lda, ipiv, h_X, &ldb, work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0) {
printf("lapackf77_zhesv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
error_lapack = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb );
TESTING_FREE_CPU( work );
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
init_matrix( N, N, h_A, lda );
lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B );
lapackf77_zlacpy( MagmaFullStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb );
magma_setdevice(0);
gpu_time = magma_wtime();
magma_zhesv( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0) {
printf("magma_zhesv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) N, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f)",
(int) N, (int) N, gpu_perf, gpu_time );
}
if ( opts.check == 0 ) {
printf(" --- \n");
} else {
error = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb );
printf(" %8.2e %s", error, (error < tol ? "ok" : "failed"));
if (opts.lapack)
printf(" (lapack rel.res. = %8.2e)", error_lapack);
printf("\n");
status += ! (error < tol);
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_PIN( h_X );
TESTING_FREE_PIN( h_B );
TESTING_FREE_PIN( h_A );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}
示例13: main
//.........这里部分代码省略.........
magma_csetvector( M, b, 1, d_B, 0, 1, opts.queue );
if ( opts.version == 1 ) {
// allocate hwork
magma_cgeqrs_gpu( M, N, 1,
d_A, 0, ldda, tau, dT, 0,
d_B, 0, M, tmp, -1, opts.queue, &info );
lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] );
TESTING_MALLOC_CPU( hwork, magmaFloatComplex, lwork );
// solve linear system
magma_cgeqrs_gpu( M, N, 1,
d_A, 0, ldda, tau, dT, 0,
d_B, 0, M, hwork, lwork, opts.queue, &info );
if (info != 0)
printf("magma_cgeqrs returned error %d: %s.\n",
(int) info, magma_strerror( info ));
TESTING_FREE_CPU( hwork );
}
#ifdef HAVE_CUBLAS
else if ( opts.version == 3 ) {
// allocate hwork
magma_cgeqrs3_gpu( M, N, 1,
d_A, 0, ldda, tau, dT, 0,
d_B, 0, M, tmp, -1, opts.queue, &info );
lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] );
TESTING_MALLOC_CPU( hwork, magmaFloatComplex, lwork );
// solve linear system
magma_cgeqrs3_gpu( M, N, 1,
d_A, 0, ldda, tau, dT, 0,
d_B, 0, M, hwork, lwork, opts.queue, &info );
if (info != 0)
printf("magma_cgeqrs3 returned error %d: %s.\n",
(int) info, magma_strerror( info ));
TESTING_FREE_CPU( hwork );
}
#endif
else {
printf( "Unknown version %d\n", opts.version );
exit(1);
}
magma_cgetvector( N, d_B, 0, 1, x, 1, opts.queue );
// compute r = Ax - b, saved in b
lapackf77_clarnv( &ione, ISEED2, &n2, h_A );
blasf77_cgemv( "Notrans", &M, &N, &c_one, h_A, &lda, x, &ione, &c_neg_one, b, &ione );
// compute residual |Ax - b| / (n*|A|*|x|)
float norm_x, norm_A, norm_r, work[1];
norm_A = lapackf77_clange( "F", &M, &N, h_A, &lda, work );
norm_r = lapackf77_clange( "F", &M, &ione, b, &M, work );
norm_x = lapackf77_clange( "F", &N, &ione, x, &N, work );
TESTING_FREE_CPU( x );
TESTING_FREE_CPU( b );
TESTING_FREE_DEV( d_B );
error = norm_r / (N * norm_A * norm_x);
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e",
(int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, error );
} else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e",
(int) M, (int) N, gpu_perf, gpu_time, error );
}
printf(" %s\n", (error < tol ? "ok" : "failed"));
status += ! (error < tol);
}
else {
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) ---",
(int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
} else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f) ---",
(int) M, (int) N, gpu_perf, gpu_time);
}
printf("%s\n", (opts.check != 0 ? " (error check only for M >= N)" : ""));
}
TESTING_FREE_CPU( tau );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_work );
TESTING_FREE_PIN( h_R );
TESTING_FREE_DEV( d_A );
if ( opts.version != 2 )
TESTING_FREE_DEV( dT );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例14: main
//.........这里部分代码省略.........
lhwork = max( max( l1, l2 ), lworkgpu );
TESTING_MALLOC_PIN( hwork, double, lhwork );
printf("\n");
printf(" ||b-Ax|| / (N||A||)\n");
printf(" M N CPU GFlop/s GPU GFlop/s CPU GPU \n");
printf("============================================================\n");
for(i=0; i<7; i++){
if (argc == 1){
M = N = size[i];
}
min_mn= min(M, N);
ldb = lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
gflops = (FLOPS_GEQRF( (double)M, (double)N )
+ FLOPS_GEQRS( (double)M, (double)N, (double)nrhs )) / 1e9;
/* Initialize the matrices */
lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
lapackf77_dlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_A2, &lda );
n2 = M*nrhs;
lapackf77_dlarnv( &ione, ISEED, &n2, h_B );
lapackf77_dlacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
/* Warm up to measure the performance */
magma_dsetmatrix( M, N, h_A, 0, lda, d_A, 0, ldda, queue );
magma_dsetmatrix( M, nrhs, h_B, 0, ldb, d_B, 0, lddb, queue );
magma_dgels_gpu( MagmaNoTrans, M, N, nrhs, d_A, 0, ldda,
d_B, 0, lddb, hwork, lworkgpu, &info, queue);
magma_dsetmatrix( M, N, h_A, 0, lda, d_A, 0, ldda, queue );
magma_dsetmatrix( M, nrhs, h_B, 0, ldb, d_B, 0, lddb, queue );
gpu_time = magma_wtime();
magma_dgels_gpu( MagmaNoTrans, M, N, nrhs, d_A, 0, ldda,
d_B, 0, lddb, hwork, lworkgpu, &info, queue);
gpu_time = magma_wtime() - gpu_time;
if (info < 0)
printf("Argument %d of magma_dgels had an illegal value.\n", -info);
gpu_perf = gflops / gpu_time;
// Get the solution in h_X
magma_dgetmatrix( N, nrhs, d_B, 0, lddb, h_X, 0, ldb, queue );
// compute the residual
blasf77_dgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
&c_neg_one, h_A, &lda,
h_X, &ldb,
&c_one, h_R, &ldb);
matnorm = lapackf77_dlange("f", &M, &N, h_A, &lda, work);
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
lapackf77_dlacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_X, &ldb );
cpu_time = magma_wtime();
lapackf77_dgels( MagmaNoTransStr, &M, &N, &nrhs,
h_A, &lda, h_X, &ldb, hwork, &lhwork, &info);
cpu_time = magma_wtime()-cpu_time;
cpu_perf = gflops / cpu_time;
if (info < 0)
printf("Argument %d of lapackf77_dgels had an illegal value.\n", -info);
blasf77_dgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
&c_neg_one, h_A2, &lda,
h_X, &ldb,
&c_one, h_B, &ldb);
printf("%5d %5d %6.1f %6.1f %7.2e %7.2e\n",
M, N, cpu_perf, gpu_perf,
lapackf77_dlange("f", &M, &nrhs, h_B, &M, work)/(min_mn*matnorm),
lapackf77_dlange("f", &M, &nrhs, h_R, &M, work)/(min_mn*matnorm) );
if (argc != 1)
break;
}
/* Memory clean up */
TESTING_FREE_PIN( tau );
TESTING_FREE_PIN( h_A );
TESTING_FREE_PIN( h_A2 );
TESTING_FREE_PIN( h_B );
TESTING_FREE_PIN( h_X );
TESTING_FREE_PIN( h_R );
TESTING_FREE_PIN( hwork );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_B );
/* Shutdown */
magma_queue_destroy( queue );
magma_finalize();
}
示例15: main
//.........这里部分代码省略.........
=================================================================== */
if ( opts.check ) {
// store ||A||*||X||
errorbis = lapackf77_zlange("fro", &msize, &msize, hA+offset*lda+offset, &lda, work );
errorbis *= lapackf77_zlange("fro", &msize, &N, hX, &lda, work );
//printf( "A =" ); magma_zprint( M, M, hA, lda );
//printf( "X =" ); magma_zprint( M, N, hX, lda );
//printf( "B =" ); magma_zprint( M, N, hB, lda );
cpu_time = magma_wtime();
blasf77_zhemm( "Left", "Lower", &msize, &N,
&calpha, hA+offset*lda+offset, &lda,
hX, &lda,
&cbeta, hB, &lda );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
/*
trace_file = fopen("AJETE/C", "w");
for (int j = 0; j < N; j++)
for (int i = 0; i < siz; i++)
fprintf(trace_file, "%10d%10d%40.30e\n", i+1, j+1, hB[j*lda+i]);
fclose(trace_file);
*/
magma_int_t firstprint=0;
for(magma_int_t dev=0; dev < opts.ngpu; ++dev) {
magma_setdevice( dev );
magma_zgetmatrix( M, N, dB[dev], ldda, hR, lda );
// compute relative error ||R||/||A||*||X||, where R := B_magma - B_lapack = R - B
size = lda*N;
blasf77_zaxpy( &size, &c_neg_one, hB, &ione, hR, &ione );
error = lapackf77_zlange("fro", &msize, &N, hR, &lda, work) / errorbis;
//printf( "R =" ); magma_zprint( M, N, hR, lda );
if (firstprint == 0) {
printf( "%5d %5d %5d %5d %7.1f (%7.4f) %7.1f (%7.4f) %7.1f (%7.4f) %8.2e %s\n",
(int) M, (int) N, (int) nb, (int) offset,
cpu_perf, cpu_time,
gpu_perf, gpu_time,
gpu_perf2, gpu_time2,
error, (error < tol ? "ok" : "failed") );
}
else {
printf( "%89s %8.2e %s\n", " ",
error, (error < tol ? "ok" : "failed") );
}
status += ! (error < tol);
firstprint =1;
}
} else {
printf( "%5d %5d %5d %5d --- ( --- ) %7.1f (%7.4f) --- ( --- ) ---\n",
(int) M, (int) N, (int) nb, (int) offset,
gpu_perf, gpu_time );
}
TESTING_FREE_CPU( hA );
TESTING_FREE_CPU( hX );
TESTING_FREE_CPU( hB );
TESTING_FREE_PIN( hR );
for( int d = 0; d < opts.ngpu; ++d ) {
magma_setdevice( d );
TESTING_FREE_DEV( dA[d] );
TESTING_FREE_DEV( dX[d] );
TESTING_FREE_DEV( dB[d] );
TESTING_FREE_DEV( dwork[d] );
TESTING_FREE_PIN( hwork[d] );
}
TESTING_FREE_PIN( hwork[opts.ngpu] );
if ( opts.check ) {
magma_setdevice( 0 );
TESTING_FREE_DEV( dA2 );
}
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
} // offset
printf( "\n" );
}
for( int d = 0; d < opts.ngpu; ++d ) {
magma_setdevice( d );
for( magma_int_t i = 0; i < nstream; ++i ) {
magma_queue_destroy( streams[d][i] );
}
for( magma_int_t i = 0; i < nbevents; ++i ) {
magma_event_destroy( redevents[d][i] );
magma_event_destroy( redevents2[d][i] );
}
}
TESTING_FINALIZE();
return status;
}