本文整理汇总了C++中TESTING_FREE_CPU函数的典型用法代码示例。如果您正苦于以下问题:C++ TESTING_FREE_CPU函数的具体用法?C++ TESTING_FREE_CPU怎么用?C++ TESTING_FREE_CPU使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了TESTING_FREE_CPU函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
//.........这里部分代码省略.........
{
M = N = K = i;
if ( M0 != 0 ) M = M0;
if ( N0 != 0 ) N = N0;
if ( K0 != 0 ) K = K0;
if( transA == MagmaNoTrans ) {
lda = Am = M;
An = K;
} else {
lda = Am = K;
An = M;
}
if( transB == MagmaNoTrans ) {
ldb = Bm = K;
Bn = N;
} else {
ldb = Bm = N;
Bn = K;
}
gflops = FLOPS( (float)M, (float)N, (float)K ) * 1e-9;
ldc = M;
ldda = ((lda+31)/32)*32;
lddb = ((ldb+31)/32)*32;
lddc = ((ldc+31)/32)*32;
szeA = lda * An;
szeB = ldb * Bn;
szeC = ldc * N;
/* Initialize the matrices */
lapackf77_clarnv( &ione, ISEED, &szeA, h_A );
lapackf77_clarnv( &ione, ISEED, &szeB, h_B );
lapackf77_clarnv( &ione, ISEED, &szeC, h_C );
/* =====================================================================
Performs operation using MAGMA-BLAS
=================================================================== */
magma_csetmatrix( Am, An, h_A, 0, lda, d_A, 0, ldda, queue );
magma_csetmatrix( Bm, Bn, h_B, 0, ldb, d_B, 0, lddb, queue );
magma_csetmatrix( M, N, h_C, 0, ldc, d_C, 0, lddc, queue );
magma_cgemm( transA, transB, M, N, K,
alpha, d_A, 0, ldda,
d_B, 0, lddb,
beta, d_C, 0, lddc, queue );
magma_csetmatrix( M, N, h_C, 0, ldc, d_C, 0, lddc, queue );
magma_queue_sync( queue );
gpu_time = magma_wtime();
magma_cgemm( transA, transB, M, N, K,
alpha, d_A, 0, ldda,
d_B, 0, lddb,
beta, d_C, 0, lddc, queue );
magma_queue_sync( queue);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
magma_cgetmatrix( M, N, d_C, 0, lddc, h_C2, 0, ldc, queue );
/* =====================================================================
Performs operation using CPU-BLAS
=================================================================== */
cpu_time = magma_wtime();
blasf77_cgemm( lapack_const(transA), lapack_const(transB),
&M, &N, &K,
&alpha, h_A, &lda,
h_B, &ldb,
&beta, h_C, &ldc );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
// |C_magma - C_lapack| / |C_lapack|
Cnorm = lapackf77_clange( "M", &M, &N, h_C, &ldc, work );
/* =====================================================================
Error Computation and Performance Compariosn
=================================================================== */
blasf77_caxpy(&szeC, &mzone, h_C, &ione, h_C2, &ione);
error = lapackf77_clange("M", &M, &N, h_C2, &ldc, work)/Cnorm;
printf("%5d %5d %5d %8.2f (%6.2f) %6.2f (%6.2f) %e\n",
M, N, K, gpu_perf, gpu_time, cpu_perf, cpu_time, error);
}
/* Memory clean up */
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_B );
TESTING_FREE_CPU( h_C );
TESTING_FREE_CPU( h_C2 );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_B );
TESTING_FREE_DEV( d_C );
magma_queue_destroy( queue );
magma_finalize();
}
示例2: main
//.........这里部分代码省略.........
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
K = opts.ksize[itest];
gflops = FLOPS_ZHERK(K, N) / 1e9;
if ( opts.transA == MagmaNoTrans ) {
lda = An = N;
Ak = K;
} else {
lda = An = K;
Ak = N;
}
ldc = N;
ldda = ((lda+31)/32)*32;
lddc = ((ldc+31)/32)*32;
sizeA = lda*Ak;
sizeC = ldc*N;
TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, lda*Ak );
TESTING_MALLOC_CPU( h_C, magmaDoubleComplex, ldc*N );
TESTING_MALLOC_CPU( h_Ccublas, magmaDoubleComplex, ldc*N );
TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*Ak );
TESTING_MALLOC_DEV( d_C, magmaDoubleComplex, lddc*N );
/* Initialize the matrices */
lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A );
lapackf77_zlarnv( &ione, ISEED, &sizeC, h_C );
/* =====================================================================
Performs operation using CUBLAS
=================================================================== */
magma_zsetmatrix( An, Ak, h_A, lda, d_A, ldda );
magma_zsetmatrix( N, N, h_C, ldc, d_C, lddc );
cublas_time = magma_sync_wtime( NULL );
cublasZherk( handle, cublas_uplo_const(opts.uplo), cublas_trans_const(opts.transA), N, K,
&alpha, d_A, ldda,
&beta, d_C, lddc );
cublas_time = magma_sync_wtime( NULL ) - cublas_time;
cublas_perf = gflops / cublas_time;
magma_zgetmatrix( N, N, d_C, lddc, h_Ccublas, ldc );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
blasf77_zherk( lapack_uplo_const(opts.uplo), lapack_trans_const(opts.transA), &N, &K,
&alpha, h_A, &lda,
&beta, h_C, &ldc );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
}
/* =====================================================================
Check the result
=================================================================== */
if ( opts.lapack ) {
// compute relative error for both magma & cublas, relative to lapack,
// |C_magma - C_lapack| / |C_lapack|
Cnorm = lapackf77_zlanhe("fro", lapack_uplo_const(opts.uplo), &N, h_C, &ldc, work);
blasf77_zaxpy( &sizeC, &c_neg_one, h_C, &ione, h_Ccublas, &ione );
cublas_error = lapackf77_zlanhe( "fro", lapack_uplo_const(opts.uplo), &N, h_Ccublas, &ldc, work ) / Cnorm;
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) N, (int) K,
cublas_perf, 1000.*cublas_time,
cpu_perf, 1000.*cpu_time,
cublas_error, (cublas_error < tol ? "ok" : "failed"));
status += ! (cublas_error < tol);
}
else {
printf("%5d %5d %7.2f (%7.2f) --- ( --- ) --- ---\n",
(int) N, (int) K,
cublas_perf, 1000.*cublas_time);
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_C );
TESTING_FREE_CPU( h_Ccublas );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_C );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例3: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgehrd
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
magmaFloatComplex *h_A, *h_R, *h_Q, *h_work, *tau, *twork, *dT;
#if defined(PRECISION_z) || defined(PRECISION_c)
float *rwork;
#endif
float eps, result[2];
magma_int_t N, n2, lda, nb, lwork, ltwork, info;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
eps = lapackf77_slamch( "E" );
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
printf(" N CPU GFlop/s (sec) GPU GFlop/s (sec) |A-QHQ'|/N|A| |I-QQ'|/N\n");
printf("=========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
n2 = lda*N;
nb = magma_get_cgehrd_nb(N);
/* We suppose the magma nb is bigger than lapack nb */
lwork = N*nb;
gflops = FLOPS_CGEHRD( N ) / 1e9;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 );
TESTING_MALLOC_CPU( tau, magmaFloatComplex, N );
TESTING_MALLOC_PIN( h_R, magmaFloatComplex, n2 );
TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork );
TESTING_MALLOC_DEV( dT, magmaFloatComplex, nb*N );
/* Initialize the matrices */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_cgehrd( N, ione, N, h_R, lda, tau, h_work, lwork, dT, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgehrd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.check ) {
ltwork = 2*(N*N);
TESTING_MALLOC_PIN( h_Q, magmaFloatComplex, lda*N );
TESTING_MALLOC_CPU( twork, magmaFloatComplex, ltwork );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_MALLOC_CPU( rwork, float, N );
#endif
lapackf77_clacpy(MagmaUpperLowerStr, &N, &N, h_R, &lda, h_Q, &lda);
for( int j = 0; j < N-1; ++j )
for( int i = j+2; i < N; ++i )
h_R[i+j*lda] = MAGMA_C_ZERO;
magma_cunghr(N, ione, N, h_Q, lda, tau, dT, nb, &info);
if (info != 0) {
printf("magma_cunghr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
exit(1);
}
#if defined(PRECISION_z) || defined(PRECISION_c)
lapackf77_chst01(&N, &ione, &N,
h_A, &lda, h_R, &lda,
h_Q, &lda, twork, <work, rwork, result);
#else
lapackf77_chst01(&N, &ione, &N,
h_A, &lda, h_R, &lda,
h_Q, &lda, twork, <work, result);
#endif
TESTING_FREE_PIN( h_Q );
TESTING_FREE_CPU( twork );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_FREE_CPU( rwork );
#endif
}
/* =====================================================================
//.........这里部分代码省略.........
示例4: main
//.........这里部分代码省略.........
TESTING_MALLOC_CPU( h_xcublas, magmaDoubleComplex, N );
TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N );
TESTING_MALLOC_DEV( d_x, magmaDoubleComplex, N );
/* Initialize the matrices */
/* Factor A into LU to get well-conditioned triangular matrix.
* Copy L to U, since L seems okay when used with non-unit diagonal
* (i.e., from U), while U fails when used with unit diagonal. */
lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A );
lapackf77_zgetrf( &N, &N, h_A, &lda, ipiv, &info );
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < j; ++i ) {
*h_A(i,j) = *h_A(j,i);
}
}
lapackf77_zlarnv( &ione, ISEED, &N, h_b );
blasf77_zcopy( &N, h_b, &ione, h_x, &ione );
/* =====================================================================
Performs operation using CUBLAS
=================================================================== */
magma_zsetmatrix( N, N, h_A, lda, d_A, ldda );
magma_zsetvector( N, h_x, 1, d_x, 1 );
cublas_time = magma_sync_wtime( NULL );
cublasZtrsv( opts.handle, cublas_uplo_const(opts.uplo),
cublas_trans_const(opts.transA), cublas_diag_const(opts.diag),
N,
d_A, ldda,
d_x, 1 );
cublas_time = magma_sync_wtime( NULL ) - cublas_time;
cublas_perf = gflops / cublas_time;
magma_zgetvector( N, d_x, 1, h_xcublas, 1 );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
blasf77_ztrsv( lapack_uplo_const(opts.uplo), lapack_trans_const(opts.transA), lapack_diag_const(opts.diag),
&N,
h_A, &lda,
h_x, &ione );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
}
/* =====================================================================
Check the result
=================================================================== */
// ||b - Ax|| / (||A||*||x||)
// error for CUBLAS
normA = lapackf77_zlange( "F", &N, &N, h_A, &lda, work );
normx = lapackf77_zlange( "F", &N, &ione, h_xcublas, &ione, work );
blasf77_ztrmv( lapack_uplo_const(opts.uplo), lapack_trans_const(opts.transA), lapack_diag_const(opts.diag),
&N,
h_A, &lda,
h_xcublas, &ione );
blasf77_zaxpy( &N, &c_neg_one, h_b, &ione, h_xcublas, &ione );
normr = lapackf77_zlange( "F", &N, &ione, h_xcublas, &N, work );
cublas_error = normr / (normA*normx);
if ( opts.lapack ) {
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) N,
cublas_perf, 1000.*cublas_time,
cpu_perf, 1000.*cpu_time,
cublas_error, (cublas_error < tol ? "ok" : "failed"));
status += ! (cublas_error < tol);
}
else {
printf("%5d %7.2f (%7.2f) --- ( --- ) %8.2e %s\n",
(int) N,
cublas_perf, 1000.*cublas_time,
cublas_error, (cublas_error < tol ? "ok" : "failed"));
status += ! (cublas_error < tol);
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_b );
TESTING_FREE_CPU( h_x );
TESTING_FREE_CPU( h_xcublas );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_x );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例5: main
//.........这里部分代码省略.........
printf("magma_zcgeqrsv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
// compute the residual
magma_zgetmatrix( N, nrhs, d_X, lddx, h_X, ldb );
blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
&c_neg_one, h_A, &lda,
h_X, &ldb,
&c_one, h_R, &ldb);
Anorm = lapackf77_zlange("f", &M, &N, h_A, &lda, work);
//=====================================================================
// Double Precision Solve
//=====================================================================
magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
magma_zsetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
gpu_time = magma_wtime();
magma_zgels_gpu( MagmaNoTrans, M, N, nrhs, d_A, ldda,
d_B, lddb, h_workd, lworkgpu, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perfd = gflops / gpu_time;
//=====================================================================
// Single Precision Solve
//=====================================================================
magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
magma_zsetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
/* The allocation of d_SA and d_SB is done here to avoid
* to double the memory used on GPU with zcgeqrsv */
TESTING_MALLOC_DEV( d_SA, magmaFloatComplex, ldda*N );
TESTING_MALLOC_DEV( d_SB, magmaFloatComplex, lddb*nrhs );
magmablas_zlag2c( M, N, d_A, ldda, d_SA, ldda, &info );
magmablas_zlag2c( N, nrhs, d_B, lddb, d_SB, lddb, &info );
gpu_time = magma_wtime();
magma_cgels_gpu( MagmaNoTrans, M, N, nrhs, d_SA, ldda,
d_SB, lddb, h_works, lhwork, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perfs = gflops / gpu_time;
TESTING_FREE_DEV( d_SA );
TESTING_FREE_DEV( d_SB );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_X, &ldb );
cpu_time = magma_wtime();
lapackf77_zgels( MagmaNoTransStr, &M, &N, &nrhs,
h_A, &lda, h_X, &ldb, h_workd, &lhwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_zgels returned error %d: %s.\n",
(int) info, magma_strerror( info ));
blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
&c_neg_one, h_A2, &lda,
h_X, &ldb,
&c_one, h_B, &ldb );
cpu_error = lapackf77_zlange("f", &M, &nrhs, h_B, &ldb, work) / (min_mn*Anorm);
gpu_error = lapackf77_zlange("f", &M, &nrhs, h_R, &ldb, work) / (min_mn*Anorm);
// error relative to LAPACK
size = M*nrhs;
blasf77_zaxpy( &size, &c_neg_one, h_B, &ione, h_R, &ione );
error = lapackf77_zlange("f", &M, &nrhs, h_R, &ldb, work) / (min_mn*Anorm);
printf("%5d %5d %5d %7.2f %7.2f %7.2f %7.2f %4d %8.2e %8.2e %8.2e %s\n",
(int) M, (int) N, (int) nrhs,
cpu_perf, gpu_perfd, gpu_perfs, gpu_perf,
(int) qrsv_iters,
cpu_error, gpu_error, error, (error < tol ? "ok" : "failed"));
status += ! (error < tol);
TESTING_FREE_CPU( tau );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_A2 );
TESTING_FREE_CPU( h_B );
TESTING_FREE_CPU( h_X );
TESTING_FREE_CPU( h_R );
TESTING_FREE_CPU( h_workd );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_B );
TESTING_FREE_DEV( d_X );
TESTING_FREE_DEV( d_T );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例6: main
//.........这里部分代码省略.........
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_cset_pointer( dA_array, dA_magma, ldda, 0, 0, ldda*N, batchCount, opts.queue );
magma_time = magma_sync_wtime( opts.queue );
info = magma_cgetrf_nopiv_batched( M, N, dA_array, ldda, dinfo_magma, batchCount, opts.queue);
magma_time = magma_sync_wtime( opts.queue ) - magma_time;
magma_perf = gflops / magma_time;
// check correctness of results throught "dinfo_magma" and correctness of argument throught "info"
magma_getvector( batchCount, sizeof(magma_int_t), dinfo_magma, 1, cpu_info, 1);
for (int i=0; i < batchCount; i++)
{
if (cpu_info[i] != 0 ) {
printf("magma_cgetrf_batched matrix %d returned internal error %d\n", i, (int)cpu_info[i] );
}
}
if (info != 0) {
printf("magma_cgetrf_batched returned argument error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
for (int i=0; i < batchCount; i++) {
lapackf77_cgetrf(&M, &N, h_A + i*lda*N, &lda, ipiv + i * min_mn, &info);
assert( info == 0 );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0) {
printf("lapackf77_cgetrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
}
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%10d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %7.2f (%7.2f)",
(int) batchCount, (int) M, (int) N, cpu_perf, cpu_time*1000., magma_perf, magma_time*1000., cublas_perf*cublas_enable, cublas_time*1000.*cublas_enable );
}
else {
printf("%10d %5d %5d --- ( --- ) %7.2f (%7.2f) %7.2f (%7.2f)",
(int) batchCount, (int) M, (int) N, magma_perf, magma_time*1000., cublas_perf*cublas_enable, cublas_time*1000.*cublas_enable );
}
if ( opts.check ) {
// initialize ipiv to 1, 2, 3, ...
for (int i=0; i < batchCount; i++)
{
for (int k=0; k < min_mn; k++) {
ipiv[i*min_mn+k] = k+1;
}
}
magma_cgetmatrix( M, N*batchCount, dA_magma, ldda, h_A, lda );
error = 0;
for (int i=0; i < batchCount; i++)
{
float err;
err = get_LU_error( M, N, h_R + i * lda*N, lda, h_A + i * lda*N, ipiv + i * min_mn);
if ( isnan(err) || isinf(err) ) {
error = err;
break;
}
error = max( err, error );
}
bool okay = (error < tol);
status += ! okay;
printf(" %8.2e %s\n", error, (okay ? "ok" : "failed") );
}
else {
printf(" --- \n");
}
TESTING_FREE_CPU( cpu_info );
TESTING_FREE_CPU( ipiv );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_R );
TESTING_FREE_DEV( dA_magma );
TESTING_FREE_DEV( dinfo_magma );
TESTING_FREE_DEV( dipiv_magma );
TESTING_FREE_DEV( dipiv_array );
TESTING_FREE_DEV( dA_array );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}
示例7: main
//.........这里部分代码省略.........
// C is full, m x n
size = ldc*n;
lapackf77_slarnv( &ione, ISEED, &size, C );
lapackf77_slacpy( "Full", &m, &n, C, &ldc, R, &ldc );
size = lda*nn;
lapackf77_slarnv( &ione, ISEED, &size, A );
// compute BRD factorization to get Householder vectors in A, tauq, taup
//lapackf77_sgebrd( &mm, &nn, A, &lda, d, e, tauq, taup, work, &lwork_max, &info );
magma_sgebrd( mm, nn, A, lda, d, e, tauq, taup, work, lwork_max, &info );
if (info != 0)
printf("magma_sgebrd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( vect[ivect] == MagmaQ ) {
tau = tauq;
} else {
tau = taup;
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_sormbr( lapack_vect_const( vect[ivect] ),
lapack_side_const( side[iside] ),
lapack_trans_const( trans[itran] ),
&m, &n, &k,
A, &lda, tau, C, &ldc, work, &lwork_max, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_sormbr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
// query for workspace size
lwork = -1;
magma_sormbr( vect[ivect], side[iside], trans[itran],
m, n, k,
A, lda, tau, R, ldc, work, lwork, &info );
if (info != 0)
printf("magma_sormbr (lwork query) returned error %d: %s.\n",
(int) info, magma_strerror( info ));
lwork = (magma_int_t) MAGMA_S_REAL( work[0] );
if ( lwork < 0 || lwork > lwork_max ) {
printf("optimal lwork %d > lwork_max %d\n", (int) lwork, (int) lwork_max );
lwork = lwork_max;
}
gpu_time = magma_wtime();
magma_sormbr( vect[ivect], side[iside], trans[itran],
m, n, k,
A, lda, tau, R, ldc, work, lwork, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_sormbr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
compute relative error |QC_magma - QC_lapack| / |QC_lapack|
=================================================================== */
error = lapackf77_slange( "Fro", &m, &n, C, &ldc, dwork );
size = ldc*n;
blasf77_saxpy( &size, &c_neg_one, C, &ione, R, &ione );
error = lapackf77_slange( "Fro", &m, &n, R, &ldc, dwork ) / error;
printf( "%5d %5d %5d %c %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) m, (int) n, (int) k,
lapacke_vect_const( vect[ivect] ),
lapacke_side_const( side[iside] ),
lapacke_trans_const( trans[itran] ),
cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
TESTING_FREE_CPU( C );
TESTING_FREE_CPU( R );
TESTING_FREE_CPU( A );
TESTING_FREE_CPU( work );
TESTING_FREE_CPU( d );
TESTING_FREE_CPU( e );
TESTING_FREE_CPU( taup );
TESTING_FREE_CPU( tauq );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}}} // end ivect, iside, itran
printf( "\n" );
}
TESTING_FINALIZE();
return status;
}
示例8: main
//.........这里部分代码省略.........
// warmup
if ( opts.warmup ) {
magma_cgegqr_gpu( 1, M, N, d_A, ldda, dwork, h_work, &info );
magma_csetmatrix( M, N, h_R, lda, d_A, ldda );
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_sync_wtime( 0 );
magma_cgegqr_gpu( opts.version, M, N, d_A, ldda, dwork, h_rwork, &info );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgegqr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
magma_cgetmatrix( M, N, d_A, ldda, h_R, M );
// Regenerate R
// blasf77_cgemm("t", "n", &N, &N, &M, &c_one, h_R, &M, h_A, &M, &c_zero, h_rwork, &N);
// magma_cprint(N, N, h_work, N);
blasf77_ctrmm("r", "u", "n", "n", &M, &N, &c_one, h_rwork, &N, h_R, &M);
blasf77_caxpy( &n2, &c_neg_one, h_A, &ione, h_R, &ione );
e5 = lapackf77_clange("i", &M, &N, h_R, &M, work) /
lapackf77_clange("i", &M, &N, h_A, &lda, work);
magma_cgetmatrix( M, N, d_A, ldda, h_R, M );
if ( opts.lapack ) {
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
/* Orthogonalize on the CPU */
lapackf77_cgeqrf(&M, &N, h_A, &lda, tau, h_work, &lwork, &info);
lapackf77_cungqr(&M, &N, &N, h_A, &lda, tau, h_work, &lwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cungqr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
blasf77_cgemm("c", "n", &N, &N, &M, &c_one, h_R, &M, h_R, &M, &c_zero, h_work, &N);
for(int ii = 0; ii < N*N; ii += N+1 ) {
h_work[ii] = MAGMA_C_SUB(h_work[ii], c_one);
}
e1 = lapackf77_clange("f", &N, &N, h_work, &N, work) / N;
e3 = lapackf77_clange("i", &N, &N, h_work, &N, work) / N;
blasf77_cgemm("c", "n", &N, &N, &M, &c_one, h_A, &M, h_A, &M, &c_zero, h_work, &N);
for(int ii = 0; ii < N*N; ii += N+1 ) {
h_work[ii] = MAGMA_C_SUB(h_work[ii], c_one);
}
e2 = lapackf77_clange("f", &N, &N, h_work, &N, work) / N;
e4 = lapackf77_clange("i", &N, &N, h_work, &N, work) / N;
if (opts.version != 4)
e = e1;
else
e = e1 / (10.*max(M,N));
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e / %8.2e %8.2e / %8.2e %8.2e %s\n",
(int) M, (int) N, cpu_perf, 1000.*cpu_time, gpu_perf, 1000.*gpu_time,
e1, e2, e3, e4, e5,
(e < tol ? "ok" : "failed"));
status += ! (e < tol);
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f) --- \n",
(int) M, (int) N, gpu_perf, 1000.*gpu_time );
}
TESTING_FREE_PIN( tau );
TESTING_FREE_PIN( h_work );
TESTING_FREE_PIN( h_rwork );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_R );
TESTING_FREE_CPU( work );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( dtau );
TESTING_FREE_DEV( dwork );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例9: main
//.........这里部分代码省略.........
| B A Z - Z D | / ( |A||Z| N ) (itype = 3)
(2) | S(with V) - S(w/o V) | / | S |
=================================================================== */
#if defined(PRECISION_d) || defined(PRECISION_s)
double *rwork = h_work + N*N;
#endif
double temp1, temp2;
result[0] = 1.;
result[0] /= lapackf77_dlansy("1", lapack_uplo_const(opts.uplo), &N, h_A, &N, rwork);
result[0] /= lapackf77_dlange("1", &N, &m1, h_R, &N, rwork);
if (opts.itype == 1) {
blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
for(int i=0; i < m1; ++i)
blasf77_dscal(&N, &w1[i], &h_R[i*N], &ione);
blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N);
result[0] *= lapackf77_dlange("1", &N, &m1, h_work, &N, rwork)/N;
}
else if (opts.itype == 2) {
blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N);
for(int i=0; i < m1; ++i)
blasf77_dscal(&N, &w1[i], &h_R[i*N], &ione);
blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N);
result[0] *= lapackf77_dlange("1", &N, &m1, h_R, &N, rwork)/N;
}
else if (opts.itype == 3) {
blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
for(int i=0; i < m1; ++i)
blasf77_dscal(&N, &w1[i], &h_R[i*N], &ione);
blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N);
result[0] *= lapackf77_dlange("1", &N, &m1, h_R, &N, rwork)/N;
}
lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
magma_dsygvdx( opts.itype, MagmaNoVec, MagmaRangeI, opts.uplo,
N, h_R, N, h_S, N, vl, vu, il, iu, &m2, w2,
h_work, lwork,
#if defined(PRECISION_z) || defined(PRECISION_c)
rwork, lrwork,
#endif
iwork, liwork,
&info );
if (info != 0)
printf("magma_dsygvdx returned error %d: %s.\n",
(int) info, magma_strerror( info ));
temp1 = temp2 = 0;
for(int j=0; j < m2; j++) {
temp1 = max(temp1, absv(w1[j]));
temp1 = max(temp1, absv(w2[j]));
temp2 = max(temp2, absv(w1[j]-w2[j]));
}
result[1] = temp2 / (((double)m2)*temp1);
}
/* =====================================================================
Print execution time
=================================================================== */
printf("%5d %5d %7.2f\n",
(int) N, (int) m1, gpu_time);
if ( opts.check ) {
printf("Testing the eigenvalues and eigenvectors for correctness:\n");
if (opts.itype == 1) {
printf("(1) | A Z - B Z D | / (|A| |Z| N) = %8.2e %s\n", result[0], (result[0] < tol ? "ok" : "failed"));
}
else if (opts.itype == 2) {
printf("(1) | A B Z - Z D | / (|A| |Z| N) = %8.2e %s\n", result[0], (result[0] < tol ? "ok" : "failed"));
}
else if (opts.itype == 3) {
printf("(1) | B A Z - Z D | / (|A| |Z| N) = %8.2e %s\n", result[0], (result[0] < tol ? "ok" : "failed"));
}
printf( "(2) | D(w/ Z) - D(w/o Z) | / |D| = %8.2e %s\n\n", result[1], (result[1] < tolulp ? "ok" : "failed"));
status += ! (result[0] < tol && result[1] < tolulp);
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_B );
TESTING_FREE_CPU( w1 );
TESTING_FREE_CPU( w2 );
TESTING_FREE_CPU( iwork );
TESTING_FREE_PIN( h_R );
TESTING_FREE_PIN( h_S );
TESTING_FREE_PIN( h_work );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_FREE_PIN( rwork );
#endif
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例10: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing csymmetrize
Code is very similar to testing_ctranspose.cpp
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex *h_A, *h_R;
magmaFloatComplex_ptr d_A;
magma_int_t N, size, lda, ldda;
magma_int_t ione = 1;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
printf("uplo = %s\n", lapack_uplo_const(opts.uplo) );
printf(" N CPU GByte/s (ms) GPU GByte/s (ms) check\n");
printf("=====================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
ldda = ((N+31)/32)*32;
size = lda*N;
// load strictly lower triangle, save strictly upper triangle
gbytes = sizeof(magmaFloatComplex) * 1.*N*(N-1) / 1e9;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, size );
TESTING_MALLOC_CPU( h_R, magmaFloatComplex, size );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
/* Initialize the matrix */
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < N; ++i ) {
h_A[i + j*lda] = MAGMA_C_MAKE( i + j/10000., j );
}
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_csetmatrix( N, N, h_A, lda, d_A, ldda );
gpu_time = magma_sync_wtime( 0 );
//magmablas_csymmetrize( opts.uplo, N-2, d_A+1+ldda, ldda ); // inset by 1 row & col
magmablas_csymmetrize( opts.uplo, N, d_A, ldda );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* =====================================================================
Performs operation using naive in-place algorithm
(LAPACK doesn't implement symmetrize)
=================================================================== */
cpu_time = magma_wtime();
//for( int j = 1; j < N-1; ++j ) { // inset by 1 row & col
// for( int i = 1; i < j; ++i ) {
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < j; ++i ) {
if ( opts.uplo == MagmaLower ) {
h_A[i + j*lda] = MAGMA_C_CNJG( h_A[j + i*lda] );
}
else {
h_A[j + i*lda] = MAGMA_C_CNJG( h_A[i + j*lda] );
}
}
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
/* =====================================================================
Check the result
=================================================================== */
magma_cgetmatrix( N, N, d_A, ldda, h_R, lda );
blasf77_caxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_clange("f", &N, &N, h_R, &lda, work);
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %s\n",
(int) N, cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
(error == 0. ? "ok" : "failed") );
status += ! (error == 0.);
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_R );
TESTING_FREE_DEV( d_A );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
//.........这里部分代码省略.........
示例11: main
//.........这里部分代码省略.........
/* Initialize the matrix */
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < M; ++i ) {
h_A[i + j*lda] = MAGMA_S_MAKE( i + j/10000., j );
}
}
for( int j = 0; j < M; ++j ) {
for( int i = 0; i < N; ++i ) {
h_B[i + j*ldb] = MAGMA_S_MAKE( i + j/10000., j );
}
}
magma_ssetmatrix( N, M, h_B, ldb, d_B, lddb );
/* =====================================================================
Performs operation using naive out-of-place algorithm
(LAPACK doesn't implement transpose)
=================================================================== */
cpu_time = magma_wtime();
//for( int j = 1; j < N-1; ++j ) { // inset by 1 row & col
// for( int i = 1; i < M-1; ++i ) { // inset by 1 row & col
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < M; ++i ) {
h_B[j + i*ldb] = h_A[i + j*lda];
}
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
/* ====================================================================
Performs operation using MAGMA, out-of-place
=================================================================== */
magma_ssetmatrix( M, N, h_A, lda, d_A, ldda );
magma_ssetmatrix( N, M, h_B, ldb, d_B, lddb );
gpu_time = magma_sync_wtime( 0 );
//magmablas_stranspose( M-2, N-2, d_A+1+ldda, ldda, d_B+1+lddb, lddb ); // inset by 1 row & col
magmablas_stranspose( M, N, d_A, ldda, d_B, lddb );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* ====================================================================
Performs operation using MAGMA, in-place
=================================================================== */
if ( M == N ) {
magma_ssetmatrix( M, N, h_A, lda, d_A, ldda );
gpu_time2 = magma_sync_wtime( 0 );
//magmablas_stranspose_inplace( N-2, d_A+1+ldda, ldda ); // inset by 1 row & col
magmablas_stranspose_inplace( N, d_A, ldda );
gpu_time2 = magma_sync_wtime( 0 ) - gpu_time2;
gpu_perf2 = gbytes / gpu_time2;
}
/* =====================================================================
Check the result
=================================================================== */
// check out-of-place transpose (d_B)
size = ldb*M;
magma_sgetmatrix( N, M, d_B, lddb, h_R, ldb );
blasf77_saxpy( &size, &c_neg_one, h_B, &ione, h_R, &ione );
error = lapackf77_slange("f", &N, &M, h_R, &ldb, work );
if ( M == N ) {
// also check in-place tranpose (d_A)
magma_sgetmatrix( N, M, d_A, ldda, h_R, ldb );
blasf77_saxpy( &size, &c_neg_one, h_B, &ione, h_R, &ione );
error2 = lapackf77_slange("f", &N, &M, h_R, &ldb, work );
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %6s %7.2f (%7.2f) %s\n",
(int) M, (int) N,
cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
(error == 0. ? "ok" : "failed"),
gpu_perf2, gpu_time2,
(error2 == 0. ? "ok" : "failed") );
status += ! (error == 0. && error2 == 0.);
}
else {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %6s --- ( --- )\n",
(int) M, (int) N,
cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
(error == 0. ? "ok" : "failed") );
status += ! (error == 0.);
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_B );
TESTING_FREE_CPU( h_R );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_B );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例12: main
//.........这里部分代码省略.........
printf("magma_cgeqrf_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_cunmqr( lapack_side_const( side[iside] ), lapack_trans_const( trans[itran] ),
&m, &n, &k,
A, &lda, tau, C, &ldc, hwork, &lwork_max, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0) {
printf("lapackf77_cunmqr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
// query for workspace size
lwork = -1;
magma_cunmqr_gpu( side[iside], trans[itran],
m, n, k,
dA, lda, tau, dC, ldc, hwork, lwork, dT, nb, &info );
if (info != 0) {
printf("magma_cunmqr_gpu (lwork query) returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
lwork = (magma_int_t) MAGMA_C_REAL( hwork[0] );
if ( lwork < 0 || lwork > lwork_max ) {
printf("Warning: optimal lwork %d > allocated lwork_max %d\n", (int) lwork, (int) lwork_max );
lwork = lwork_max;
}
// cunmqr2 takes a copy of dA in CPU memory
if ( opts.version == 2 ) {
magma_cgetmatrix( mm, k, dA, lda, A, lda );
}
magmablasSetKernelStream( opts.queue );
gpu_time = magma_sync_wtime( opts.queue ); // sync needed for L,N and R,T cases
if ( opts.version == 1 ) {
magma_cunmqr_gpu( side[iside], trans[itran],
m, n, k,
dA, lda, tau, dC, ldc, hwork, lwork, dT, nb, &info );
}
else if ( opts.version == 2 ) {
magma_cunmqr2_gpu( side[iside], trans[itran],
m, n, k,
dA, lda, tau, dC, ldc, A, lda, &info );
}
gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0) {
printf("magma_cunmqr_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
magma_cgetmatrix( m, n, dC, ldc, R, ldc );
/* =====================================================================
compute relative error |QC_magma - QC_lapack| / |QC_lapack|
=================================================================== */
size = ldc*n;
blasf77_caxpy( &size, &c_neg_one, C, &ione, R, &ione );
Cnorm = lapackf77_clange( "Fro", &m, &n, C, &ldc, work );
error = lapackf77_clange( "Fro", &m, &n, R, &ldc, work ) / (magma_ssqrt(m*n) * Cnorm);
printf( "%5d %5d %5d %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) m, (int) n, (int) k,
lapacke_side_const( side[iside] ),
lapacke_trans_const( trans[itran] ),
cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
TESTING_FREE_CPU( C );
TESTING_FREE_CPU( R );
TESTING_FREE_CPU( A );
TESTING_FREE_CPU( hwork );
TESTING_FREE_CPU( tau );
TESTING_FREE_DEV( dC );
TESTING_FREE_DEV( dA );
TESTING_FREE_DEV( dT );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}} // end iside, itran
printf( "\n" );
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}
示例13: main
//.........这里部分代码省略.........
double tol = opts.tolerance * lapackf77_dlamch("E");
printf("uplo = %s\n", lapack_uplo_const(opts.uplo) );
printf(" N MAGMA Gflop/s (ms) CPU Gflop/s (ms) MAGMA error\n");
printf("=========================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
ldda = ((N + 31)/32)*32;
sizeA = N*lda;
sizeX = N*incx;
sizeY = N*incy;
gflops = FLOPS_ZSYMV( N ) / 1e9;
TESTING_MALLOC_CPU( A, magmaDoubleComplex, sizeA );
TESTING_MALLOC_CPU( X, magmaDoubleComplex, sizeX );
TESTING_MALLOC_CPU( Y, magmaDoubleComplex, sizeY );
TESTING_MALLOC_CPU( Ymagma, magmaDoubleComplex, sizeY );
TESTING_MALLOC_DEV( dA, magmaDoubleComplex, ldda*N );
TESTING_MALLOC_DEV( dX, magmaDoubleComplex, sizeX );
TESTING_MALLOC_DEV( dY, magmaDoubleComplex, sizeY );
blocks = (N + nb - 1) / nb;
ldwork = ldda*blocks;
TESTING_MALLOC_DEV( dwork, magmaDoubleComplex, ldwork );
magmablas_zlaset( MagmaFull, ldwork, 1, MAGMA_Z_NAN, MAGMA_Z_NAN, dwork, ldwork );
magmablas_zlaset( MagmaFull, ldda, N, MAGMA_Z_NAN, MAGMA_Z_NAN, dA, ldda );
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &sizeA, A );
magma_zmake_hermitian( N, A, lda );
lapackf77_zlarnv( &ione, ISEED, &sizeX, X );
lapackf77_zlarnv( &ione, ISEED, &sizeY, Y );
/* Note: CUBLAS does not implement zsymv */
/* =====================================================================
Performs operation using MAGMABLAS
=================================================================== */
magma_zsetmatrix( N, N, A, lda, dA, ldda );
magma_zsetvector( N, X, incx, dX, incx );
magma_zsetvector( N, Y, incy, dY, incy );
//magma_zprint_gpu( ldda, blocks, dwork, ldda );
magma_time = magma_sync_wtime( 0 );
magmablas_zsymv_work( opts.uplo, N, alpha, dA, ldda, dX, incx, beta, dY, incy, dwork, ldwork );
// TODO provide option to test non-work interface
//magmablas_zsymv( opts.uplo, N, alpha, dA, ldda, dX, incx, beta, dY, incy );
magma_time = magma_sync_wtime( 0 ) - magma_time;
magma_perf = gflops / magma_time;
magma_zgetvector( N, dY, incy, Ymagma, incy );
//magma_zprint_gpu( ldda, blocks, dwork, ldda );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
cpu_time = magma_wtime();
lapackf77_zsymv( lapack_uplo_const(opts.uplo), &N, &alpha, A, &lda, X, &incx, &beta, Y, &incy );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
/* =====================================================================
Check the result
=================================================================== */
blasf77_zaxpy( &N, &c_neg_one, Y, &incy, Ymagma, &incy );
magma_error = lapackf77_zlange( "M", &N, &ione, Ymagma, &N, work ) / N;
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) N,
magma_perf, 1000.*magma_time,
cpu_perf, 1000.*cpu_time,
magma_error, (magma_error < tol ? "ok" : "failed"));
status += ! (magma_error < tol);
TESTING_FREE_CPU( A );
TESTING_FREE_CPU( X );
TESTING_FREE_CPU( Y );
TESTING_FREE_CPU( Ymagma );
TESTING_FREE_DEV( dA );
TESTING_FREE_DEV( dX );
TESTING_FREE_DEV( dY );
TESTING_FREE_DEV( dwork );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例14: main
//.........这里部分代码省略.........
// Create two queues on device opts.device
err = magma_queue_create( devices[opts.device], &queue[0] );
if ( err != 0 ) {
fprintf( stderr, "magma_queue_create failed: %d\n", err );
exit(-1);
}
err = magma_queue_create( devices[opts.device], &queue[1] );
if ( err != 0 ) {
fprintf( stderr, "magma_queue_create failed: %d\n", err );
exit(-1);
}
printf("ngpu %d\n", (int) opts.ngpu );
if ( opts.check == 2 ) {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |Ax-b|/(N*|A|*|x|)\n");
}
else {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |PA-LU|/(N*|A|)\n");
}
printf("=========================================================================\n");
for( int i = 0; i < opts.ntest; ++i ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[i];
N = opts.nsize[i];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
gflops = FLOPS_CGETRF( M, N ) / 1e9;
TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn );
TESTING_MALLOC_PIN( h_A, magmaFloatComplex, n2 );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
init_matrix( M, N, h_A, lda );
cpu_time = magma_wtime();
lapackf77_cgetrf(&M, &N, h_A, &lda, ipiv, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cgetrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
init_matrix( M, N, h_A, lda );
gpu_time = magma_wtime();
magma_cgetrf( M, N, h_A, lda, ipiv, &info, queue);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgetrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f)",
(int) M, (int) N, gpu_perf, gpu_time );
}
if ( opts.check == 2 ) {
error = get_residual( M, N, h_A, lda, ipiv );
printf(" %8.2e%s\n", error, (error < tol ? "" : " failed"));
status |= ! (error < tol);
}
else if ( opts.check ) {
error = get_LU_error( M, N, h_A, lda, ipiv );
printf(" %8.2e%s\n", error, (error < tol ? "" : " failed"));
status |= ! (error < tol);
}
else {
printf(" --- \n");
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_PIN( h_A );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
magma_queue_destroy( queue[0] );
magma_queue_destroy( queue[1] );
magma_finalize();
return status;
}
示例15: main
//.........这里部分代码省略.........
magma_dsetmatrix( N, N, h_A, lda, d_A, ldda );
magma_dgetrf_gpu( N, N, d_A, ldda, ipiv, &info );
magma_dgetmatrix( N, N, d_A, ldda, h_Ainv, lda );
if (info != 0) {
printf("magma_dgetrf_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
// check for exact singularity
//h_Ainv[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 );
//magma_dsetmatrix( N, N, h_Ainv, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_dgetri_gpu( N, d_A, ldda, ipiv, dwork, ldwork, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0) {
printf("magma_dgetri_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_dgetri( &N, h_Ainv, &lda, ipiv, work, &lwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0) {
printf("lapackf77_dgetri returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
printf( "%5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
}
else {
printf( "%5d --- ( --- ) %7.2f (%7.2f)",
(int) N, gpu_perf, gpu_time );
}
/* =====================================================================
Check the result
=================================================================== */
if ( opts.check ) {
magma_dgetmatrix( N, N, d_A, ldda, h_Ainv, lda );
// compute 1-norm condition number estimate, following LAPACK's zget03
double normA, normAinv, rcond;
normA = lapackf77_dlange( "1", &N, &N, h_A, &lda, rwork );
normAinv = lapackf77_dlange( "1", &N, &N, h_Ainv, &lda, rwork );
if ( normA <= 0 || normAinv <= 0 ) {
rcond = 0;
error = 1 / (tol/opts.tolerance); // == 1/eps
}
else {
rcond = (1 / normA) / normAinv;
// R = I
// R -= A*A^{-1}
// err = ||I - A*A^{-1}|| / ( N ||A||*||A^{-1}|| ) = ||R|| * rcond / N, using 1-norm
lapackf77_dlaset( "full", &N, &N, &c_zero, &c_one, h_R, &lda );
blasf77_dgemm( "no", "no", &N, &N, &N,
&c_neg_one, h_A, &lda,
h_Ainv, &lda,
&c_one, h_R, &lda );
error = lapackf77_dlange( "1", &N, &N, h_R, &lda, rwork );
error = error * rcond / N;
}
bool okay = (error < tol);
status += ! okay;
printf( " %8.2e %s\n",
error, (okay ? "ok" : "failed"));
}
else {
printf( "\n" );
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_CPU( work );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_Ainv );
TESTING_FREE_CPU( h_R );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( dwork );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}