本文整理汇总了C++中TESTING_FINALIZE函数的典型用法代码示例。如果您正苦于以下问题:C++ TESTING_FINALIZE函数的具体用法?C++ TESTING_FINALIZE怎么用?C++ TESTING_FINALIZE使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了TESTING_FINALIZE函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
//.........这里部分代码省略.........
sizeA = lda*N;
TESTING_MALLOC( ipiv, magma_int_t, N );
TESTING_MALLOC( h_A, float, lda*N );
TESTING_MALLOC( h_b, float, N );
TESTING_MALLOC( h_x, float, N );
TESTING_MALLOC( h_xcublas, float, N );
TESTING_DEVALLOC( d_A, float, ldda*N );
TESTING_DEVALLOC( d_x, float, N );
/* Initialize the matrices */
/* Factor A into LU to get well-conditioned triangular matrix.
* Copy L to U, since L seems okay when used with non-unit diagonal
* (i.e., from U), while U fails when used with unit diagonal. */
lapackf77_slarnv( &ione, ISEED, &sizeA, h_A );
lapackf77_sgetrf( &N, &N, h_A, &lda, ipiv, &info );
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < j; ++i ) {
*h_A(i,j) = *h_A(j,i);
}
}
lapackf77_slarnv( &ione, ISEED, &N, h_b );
blasf77_scopy( &N, h_b, &ione, h_x, &ione );
/* =====================================================================
Performs operation using CUDA-BLAS
=================================================================== */
magma_ssetmatrix( N, N, h_A, lda, d_A, ldda );
magma_ssetvector( N, h_x, 1, d_x, 1 );
cublas_time = magma_sync_wtime( NULL );
cublasStrsv( opts.uplo, opts.transA, opts.diag,
N,
d_A, ldda,
d_x, 1 );
cublas_time = magma_sync_wtime( NULL ) - cublas_time;
cublas_perf = gflops / cublas_time;
magma_sgetvector( N, d_x, 1, h_xcublas, 1 );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
blasf77_strsv( &opts.uplo, &opts.transA, &opts.diag,
&N,
h_A, &lda,
h_x, &ione );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
}
/* =====================================================================
Check the result
=================================================================== */
// ||b - Ax|| / (||A||*||x||)
// error for CUBLAS
normA = lapackf77_slange( "F", &N, &N, h_A, &lda, work );
normx = lapackf77_slange( "F", &N, &ione, h_xcublas, &ione, work );
blasf77_strmv( &opts.uplo, &opts.transA, &opts.diag,
&N,
h_A, &lda,
h_xcublas, &ione );
blasf77_saxpy( &N, &c_neg_one, h_b, &ione, h_xcublas, &ione );
normr = lapackf77_slange( "F", &N, &ione, h_xcublas, &N, work );
cublas_error = normr / (normA*normx);
if ( opts.lapack ) {
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n",
(int) N,
cublas_perf, 1000.*cublas_time,
cpu_perf, 1000.*cpu_time,
cublas_error );
}
else {
printf("%5d %7.2f (%7.2f) --- ( --- ) %8.2e\n",
(int) N,
cublas_perf, 1000.*cublas_time,
cublas_error );
}
TESTING_FREE( h_A );
TESTING_FREE( h_x );
TESTING_FREE( h_xcublas );
TESTING_DEVFREE( d_A );
TESTING_DEVFREE( d_x );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return 0;
}
示例2: main
//.........这里部分代码省略.........
ngpu = min( opts.ngpu, int((N+nb-1)/nb) );
if ( ngpu < opts.ngpu ) {
printf( " * too many GPUs for the matrix size, using %d GPUs\n", (int) ngpu );
}
// Allocate host memory for the matrix
TESTING_MALLOC_CPU( h_A, double, n2 );
TESTING_MALLOC_PIN( h_R, double, n2 );
// Allocate device memory
// matrix is distributed by block-rows or block-columns
// this is maximum size that any GPU stores;
// size is rounded up to full blocks in both rows and columns
max_size = nb*(1+N/(nb*ngpu)) * nb*((N+nb-1)/nb);
for( int dev=0; dev < ngpu; dev++ ) {
magma_setdevice( dev );
TESTING_MALLOC_DEV( d_lA[dev], double, max_size );
}
/* Initialize the matrix */
lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
magma_dmake_hpd( N, h_A, lda );
lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_dpotrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_dpotrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
if ( opts.uplo == MagmaUpper ) {
ldda = ((N+nb-1)/nb)*nb;
magma_dsetmatrix_1D_col_bcyclic( N, N, h_R, lda, d_lA, ldda, ngpu, nb );
} else {
ldda = (1+N/(nb*ngpu))*nb;
magma_dsetmatrix_1D_row_bcyclic( N, N, h_R, lda, d_lA, ldda, ngpu, nb );
}
gpu_time = magma_wtime();
magma_dpotrf_mgpu( ngpu, opts.uplo, N, d_lA, ldda, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_dpotrf_mgpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( opts.uplo == MagmaUpper ) {
magma_dgetmatrix_1D_col_bcyclic( N, N, d_lA, ldda, h_R, lda, ngpu, nb );
} else {
magma_dgetmatrix_1D_row_bcyclic( N, N, d_lA, ldda, h_R, lda, ngpu, nb );
}
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
for( int dev=0; dev < ngpu; dev++ ){
magma_setdevice( dev );
magma_device_sync();
}
if ( opts.lapack ) {
error = lapackf77_dlange("f", &N, &N, h_A, &lda, work );
blasf77_daxpy( &n2, &c_neg_one, h_A, &ione, h_R, &ione );
error = lapackf77_dlange("f", &N, &N, h_R, &lda, work ) / error;
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
}
else {
printf("%5d --- ( --- ) %7.2f (%7.2f) ---\n",
(int) N, gpu_perf, gpu_time );
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_PIN( h_R );
for( int dev=0; dev < ngpu; dev++ ){
magma_setdevice( dev );
TESTING_FREE_DEV( d_lA[dev] );
}
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例3: main
//.........这里部分代码省略.........
// C is full, m x n
size = ldc*n;
lapackf77_dlarnv( &ione, ISEED, &size, C );
lapackf77_dlacpy( "Full", &m, &n, C, &ldc, R, &ldc );
size = lda*nn;
lapackf77_dlarnv( &ione, ISEED, &size, A );
// compute BRD factorization to get Householder vectors in A, tauq, taup
//lapackf77_dgebrd( &mm, &nn, A, &lda, d, e, tauq, taup, work, &lwork_max, &info );
magma_dgebrd( mm, nn, A, lda, d, e, tauq, taup, work, lwork_max, &info );
if (info != 0)
printf("magma_dgebrd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( vect[ivect] == MagmaQ ) {
tau = tauq;
} else {
tau = taup;
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_dormbr( lapack_vect_const( vect[ivect] ),
lapack_side_const( side[iside] ),
lapack_trans_const( trans[itran] ),
&m, &n, &k,
A, &lda, tau, C, &ldc, work, &lwork_max, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_dormbr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
// query for workspace size
lwork = -1;
magma_dormbr( vect[ivect], side[iside], trans[itran],
m, n, k,
A, lda, tau, R, ldc, work, lwork, &info );
if (info != 0)
printf("magma_dormbr (lwork query) returned error %d: %s.\n",
(int) info, magma_strerror( info ));
lwork = (magma_int_t) MAGMA_D_REAL( work[0] );
if ( lwork < 0 || lwork > lwork_max ) {
printf("optimal lwork %d > lwork_max %d\n", (int) lwork, (int) lwork_max );
lwork = lwork_max;
}
gpu_time = magma_wtime();
magma_dormbr( vect[ivect], side[iside], trans[itran],
m, n, k,
A, lda, tau, R, ldc, work, lwork, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_dormbr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
compute relative error |QC_magma - QC_lapack| / |QC_lapack|
=================================================================== */
error = lapackf77_dlange( "Fro", &m, &n, C, &ldc, dwork );
size = ldc*n;
blasf77_daxpy( &size, &c_neg_one, C, &ione, R, &ione );
error = lapackf77_dlange( "Fro", &m, &n, R, &ldc, dwork ) / error;
printf( "%5d %5d %5d %c %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) m, (int) n, (int) k,
lapacke_vect_const( vect[ivect] ),
lapacke_side_const( side[iside] ),
lapacke_trans_const( trans[itran] ),
cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
TESTING_FREE_CPU( C );
TESTING_FREE_CPU( R );
TESTING_FREE_CPU( A );
TESTING_FREE_CPU( work );
TESTING_FREE_CPU( d );
TESTING_FREE_CPU( e );
TESTING_FREE_CPU( taup );
TESTING_FREE_CPU( tauq );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}}} // end ivect, iside, itran
printf( "\n" );
}
TESTING_FINALIZE();
return status;
}
示例4: main
//.........这里部分代码省略.........
lapacke_trans_const( trans[itran] ) );
continue;
}
// need at least 2*nb*nb for geqlf
lwork_max = max( max( m*nb, n*nb ), 2*nb*nb );
TESTING_MALLOC_CPU( C, magmaFloatComplex, ldc*n );
TESTING_MALLOC_CPU( R, magmaFloatComplex, ldc*n );
TESTING_MALLOC_CPU( A, magmaFloatComplex, lda*k );
TESTING_MALLOC_CPU( W, magmaFloatComplex, lwork_max );
TESTING_MALLOC_CPU( tau, magmaFloatComplex, k );
// C is full, m x n
size = ldc*n;
lapackf77_clarnv( &ione, ISEED, &size, C );
lapackf77_clacpy( "Full", &m, &n, C, &ldc, R, &ldc );
size = lda*k;
lapackf77_clarnv( &ione, ISEED, &size, A );
// compute QL factorization to get Householder vectors in A, tau
magma_cgeqlf( mm, k, A, lda, tau, W, lwork_max, &info );
if (info != 0)
printf("magma_cgeqlf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_cunmql( lapack_side_const( side[iside] ), lapack_trans_const( trans[itran] ),
&m, &n, &k,
A, &lda, tau, C, &ldc, W, &lwork_max, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cunmql returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
// query for workspace size
lwork = -1;
magma_cunmql( side[iside], trans[itran],
m, n, k,
A, lda, tau, R, ldc, W, lwork, &info );
if (info != 0)
printf("magma_cunmql (lwork query) returned error %d: %s.\n",
(int) info, magma_strerror( info ));
lwork = (magma_int_t) MAGMA_C_REAL( W[0] );
if ( lwork < 0 || lwork > lwork_max ) {
printf("optimal lwork %d > lwork_max %d\n", (int) lwork, (int) lwork_max );
lwork = lwork_max;
}
gpu_time = magma_wtime();
magma_cunmql( side[iside], trans[itran],
m, n, k,
A, lda, tau, R, ldc, W, lwork, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cunmql returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
compute relative error |QC_magma - QC_lapack| / |QC_lapack|
=================================================================== */
error = lapackf77_clange( "Fro", &m, &n, C, &ldc, work );
size = ldc*n;
blasf77_caxpy( &size, &c_neg_one, C, &ione, R, &ione );
error = lapackf77_clange( "Fro", &m, &n, R, &ldc, work ) / error;
printf( "%5d %5d %5d %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) m, (int) n, (int) k,
lapacke_side_const( side[iside] ),
lapacke_trans_const( trans[itran] ),
cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
TESTING_FREE_CPU( C );
TESTING_FREE_CPU( R );
TESTING_FREE_CPU( A );
TESTING_FREE_CPU( W );
TESTING_FREE_CPU( tau );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}} // end iside, itran
printf( "\n" );
}
TESTING_FINALIZE();
return status;
}
示例5: main
//.........这里部分代码省略.........
magma_opts opts;
parse_opts( argc, argv, &opts );
double tol = opts.tolerance * lapackf77_dlamch("E");
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |Ax-b|/(N*|A|*|x|)\n");
printf("=========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
ldb = N;
lda = N;
n2 = lda*N;
sizeB = ldb*opts.nrhs;
gflops = ( FLOPS_DPOTRF( N ) + FLOPS_DPOTRS( N, opts.nrhs ) ) / 1e9;
TESTING_MALLOC_CPU( ipiv, magma_int_t, N );
TESTING_MALLOC_PIN( h_A, double, n2 );
TESTING_MALLOC_PIN( h_B, double, sizeB );
TESTING_MALLOC_PIN( h_X, double, sizeB );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
lwork = -1;
lapackf77_dsysv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs,
h_A, &lda, ipiv, h_X, &ldb, &temp, &lwork, &info);
lwork = (int)MAGMA_D_REAL(temp);
TESTING_MALLOC_CPU( work, double, lwork );
init_matrix( N, N, h_A, lda );
lapackf77_dlarnv( &ione, ISEED, &sizeB, h_B );
lapackf77_dlacpy( MagmaUpperLowerStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb );
cpu_time = magma_wtime();
lapackf77_dsysv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs,
h_A, &lda, ipiv, h_X, &ldb, work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_dsysv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
error_lapack = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb );
TESTING_FREE_CPU( work );
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
init_matrix( N, N, h_A, lda );
lapackf77_dlarnv( &ione, ISEED, &sizeB, h_B );
lapackf77_dlacpy( MagmaUpperLowerStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb );
magma_setdevice(0);
gpu_time = magma_wtime();
magma_dsysv( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_dsysv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) N, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f)",
(int) N, (int) N, gpu_perf, gpu_time );
}
if ( opts.check == 0 ) {
printf(" --- \n");
} else {
error = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb );
printf(" %8.2e %s", error, (error < tol ? "ok" : "failed"));
if (opts.lapack)
printf(" (lapack rel.res. = %8.2e)", error_lapack);
printf("\n");
status += ! (error < tol);
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_PIN( h_X );
TESTING_FREE_PIN( h_B );
TESTING_FREE_PIN( h_A );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例6: main
/* ////////////////////////////////////////////////////////////////////////////
-- testing csr matrix add
*/
int main( int argc, char** argv )
{
magma_int_t info = 0;
TESTING_INIT();
magma_queue_t queue=NULL;
magma_queue_create( &queue );
real_Double_t res;
magma_s_matrix A={Magma_CSR}, B={Magma_CSR}, B2={Magma_CSR},
A_d={Magma_CSR}, B_d={Magma_CSR}, C_d={Magma_CSR};
float one = MAGMA_S_MAKE(1.0, 0.0);
float mone = MAGMA_S_MAKE(-1.0, 0.0);
magma_int_t i=1;
if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test
i++;
magma_int_t laplace_size = atoi( argv[i] );
CHECK( magma_sm_5stencil( laplace_size, &A, queue ));
} else { // file-matrix test
CHECK( magma_s_csr_mtx( &A, argv[i], queue ));
}
printf("%% matrix info: %d-by-%d with %d nonzeros\n",
int(A.num_rows), int(A.num_cols), int(A.nnz) );
i++;
if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test
i++;
magma_int_t laplace_size = atoi( argv[i] );
CHECK( magma_sm_5stencil( laplace_size, &B, queue ));
} else { // file-matrix test
CHECK( magma_s_csr_mtx( &B, argv[i], queue ));
}
printf("%% matrix info: %d-by-%d with %d nonzeros\n",
int(B.num_rows), int(B.num_cols), int(B.nnz) );
CHECK( magma_smtransfer( A, &A_d, Magma_CPU, Magma_DEV, queue ));
CHECK( magma_smtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ));
CHECK( magma_scuspaxpy( &one, A_d, &one, B_d, &C_d, queue ));
magma_smfree(&B_d, queue );
CHECK( magma_scuspaxpy( &mone, A_d, &one, C_d, &B_d, queue ));
CHECK( magma_smtransfer( B_d, &B2, Magma_DEV, Magma_CPU, queue ));
magma_smfree(&A_d, queue );
magma_smfree(&B_d, queue );
magma_smfree(&C_d, queue );
// check difference
CHECK( magma_smdiff( B, B2, &res, queue ));
printf("%% ||A-B||_F = %8.2e\n", res);
if ( res < .000001 )
printf("%% tester matrix add: ok\n");
else
printf("%% tester matrix add: failed\n");
magma_smfree(&A, queue );
magma_smfree(&B, queue );
magma_smfree(&B2, queue );
cleanup:
magma_smfree(&A_d, queue );
magma_smfree(&B_d, queue );
magma_smfree(&C_d, queue );
magma_smfree(&A, queue );
magma_smfree(&B, queue );
magma_smfree(&B2, queue );
magma_queue_destroy( queue );
TESTING_FINALIZE();
return info;
}
示例7: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zpotf2_gpu
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
magmaDoubleComplex *h_A, *h_R;
magmaDoubleComplex *d_A;
magma_int_t N, n2, lda, ldda, info;
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
double work[1], error;
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
printf(" N CPU GFlop/s (ms) GPU GFlop/s (ms) ||R_magma - R_lapack||_F / ||R_lapack||_F\n");
printf("========================================================\n");
for( int i = 0; i < opts.ntest; ++i ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[i];
lda = N;
n2 = lda*N;
ldda = ((N+31)/32)*32;
gflops = FLOPS_ZPOTRF( N ) / 1e9;
TESTING_MALLOC( h_A, magmaDoubleComplex, n2 );
TESTING_HOSTALLOC( h_R, magmaDoubleComplex, n2 );
TESTING_DEVALLOC( d_A, magmaDoubleComplex, ldda*N );
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
magma_zmake_hpd( N, h_A, lda );
lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
magma_zsetmatrix( N, N, h_A, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_zpotf2_gpu( opts.uplo, N, d_A, ldda, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_zpotf2_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( opts.lapack ) {
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_zpotrf( &opts.uplo, &N, h_A, &lda, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_zpotrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
magma_zgetmatrix( N, N, d_A, ldda, h_R, lda );
error = lapackf77_zlange("f", &N, &N, h_A, &lda, work);
blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_zlange("f", &N, &N, h_R, &lda, work) / error;
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n",
(int) N, cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000., error );
}
else {
printf("%5d --- ( --- ) %7.2f (%7.2f) --- \n",
(int) N, gpu_perf, gpu_time*1000. );
}
TESTING_FREE( h_A );
TESTING_HOSTFREE( h_R );
TESTING_DEVFREE( d_A );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return 0;
}
示例8: main
/* ////////////////////////////////////////////////////////////////////////////
-- testing any solver
*/
int main( int argc, char** argv )
{
magma_int_t info = 0;
TESTING_INIT();
magma_zopts zopts;
magma_queue_t queue=NULL;
magma_queue_create( /*devices[ opts->device ],*/ &queue );
magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);
magma_z_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR};
magma_z_matrix x={Magma_CSR}, b={Magma_CSR};
int i=1;
CHECK( magma_zparse_opts( argc, argv, &zopts, &i, queue ));
B.blocksize = zopts.blocksize;
B.alignment = zopts.alignment;
if ( zopts.solver_par.solver != Magma_PCG &&
zopts.solver_par.solver != Magma_PGMRES &&
zopts.solver_par.solver != Magma_PBICGSTAB &&
zopts.solver_par.solver != Magma_ITERREF &&
zopts.solver_par.solver != Magma_LOBPCG )
zopts.precond_par.solver = Magma_NONE;
CHECK( magma_zsolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue ));
while( i < argc ) {
if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test
i++;
magma_int_t laplace_size = atoi( argv[i] );
CHECK( magma_zm_5stencil( laplace_size, &A, queue ));
} else { // file-matrix test
CHECK( magma_z_csr_mtx( &A, argv[i], queue ));
}
printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
(int) A.num_rows,(int) A.num_cols,(int) A.nnz );
// for the eigensolver case
zopts.solver_par.ev_length = A.num_rows;
CHECK( magma_zeigensolverinfo_init( &zopts.solver_par, queue ));
// scale matrix
CHECK( magma_zmscale( &A, zopts.scaling, queue ));
CHECK( magma_zmconvert( A, &B, Magma_CSR, zopts.output_format, queue ));
CHECK( magma_zmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ));
// vectors and initial guess
CHECK( magma_zvinit( &b, Magma_DEV, A.num_cols, 1, one, queue ));
//magma_zvinit( &x, Magma_DEV, A.num_cols, 1, one, queue );
//magma_z_spmv( one, B_d, x, zero, b, queue ); // b = A x
//magma_zmfree(&x, queue );
CHECK( magma_zvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue ));
info = magma_z_solver( B_d, b, &x, &zopts, queue );
if( info != 0 ){
printf("error: solver returned: %s (%d).\n",
magma_strerror( info ), info );
}
magma_zsolverinfo( &zopts.solver_par, &zopts.precond_par, queue );
magma_zmfree(&B_d, queue );
magma_zmfree(&B, queue );
magma_zmfree(&A, queue );
magma_zmfree(&x, queue );
magma_zmfree(&b, queue );
i++;
}
cleanup:
magma_zmfree(&B_d, queue );
magma_zmfree(&B, queue );
magma_zmfree(&A, queue );
magma_zmfree(&x, queue );
magma_zmfree(&b, queue );
magma_zsolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue );
magma_queue_destroy( queue );
TESTING_FINALIZE();
return info;
}
示例9: main
//.........这里部分代码省略.........
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_cset_pointer( dA_array, dA_magma, ldda, 0, 0, ldda*N, batchCount, opts.queue );
magma_time = magma_sync_wtime( opts.queue );
info = magma_cgetrf_nopiv_batched( M, N, dA_array, ldda, dinfo_magma, batchCount, opts.queue);
magma_time = magma_sync_wtime( opts.queue ) - magma_time;
magma_perf = gflops / magma_time;
// check correctness of results throught "dinfo_magma" and correctness of argument throught "info"
magma_getvector( batchCount, sizeof(magma_int_t), dinfo_magma, 1, cpu_info, 1);
for (int i=0; i < batchCount; i++)
{
if (cpu_info[i] != 0 ) {
printf("magma_cgetrf_batched matrix %d returned internal error %d\n", i, (int)cpu_info[i] );
}
}
if (info != 0) {
printf("magma_cgetrf_batched returned argument error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
for (int i=0; i < batchCount; i++) {
lapackf77_cgetrf(&M, &N, h_A + i*lda*N, &lda, ipiv + i * min_mn, &info);
assert( info == 0 );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0) {
printf("lapackf77_cgetrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
}
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%10d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %7.2f (%7.2f)",
(int) batchCount, (int) M, (int) N, cpu_perf, cpu_time*1000., magma_perf, magma_time*1000., cublas_perf*cublas_enable, cublas_time*1000.*cublas_enable );
}
else {
printf("%10d %5d %5d --- ( --- ) %7.2f (%7.2f) %7.2f (%7.2f)",
(int) batchCount, (int) M, (int) N, magma_perf, magma_time*1000., cublas_perf*cublas_enable, cublas_time*1000.*cublas_enable );
}
if ( opts.check ) {
// initialize ipiv to 1, 2, 3, ...
for (int i=0; i < batchCount; i++)
{
for (int k=0; k < min_mn; k++) {
ipiv[i*min_mn+k] = k+1;
}
}
magma_cgetmatrix( M, N*batchCount, dA_magma, ldda, h_A, lda );
error = 0;
for (int i=0; i < batchCount; i++)
{
float err;
err = get_LU_error( M, N, h_R + i * lda*N, lda, h_A + i * lda*N, ipiv + i * min_mn);
if ( isnan(err) || isinf(err) ) {
error = err;
break;
}
error = max( err, error );
}
bool okay = (error < tol);
status += ! okay;
printf(" %8.2e %s\n", error, (okay ? "ok" : "failed") );
}
else {
printf(" --- \n");
}
TESTING_FREE_CPU( cpu_info );
TESTING_FREE_CPU( ipiv );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_R );
TESTING_FREE_DEV( dA_magma );
TESTING_FREE_DEV( dinfo_magma );
TESTING_FREE_DEV( dipiv_magma );
TESTING_FREE_DEV( dipiv_array );
TESTING_FREE_DEV( dA_array );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}
示例10: main
//.........这里部分代码省略.........
=================================================================== */
gpu_time = magma_wtime();
magma_cgehrd( N, ione, N, h_R, lda, tau, h_work, lwork, dT, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgehrd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.check ) {
ltwork = 2*(N*N);
TESTING_MALLOC_PIN( h_Q, magmaFloatComplex, lda*N );
TESTING_MALLOC_CPU( twork, magmaFloatComplex, ltwork );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_MALLOC_CPU( rwork, float, N );
#endif
lapackf77_clacpy(MagmaUpperLowerStr, &N, &N, h_R, &lda, h_Q, &lda);
for( int j = 0; j < N-1; ++j )
for( int i = j+2; i < N; ++i )
h_R[i+j*lda] = MAGMA_C_ZERO;
magma_cunghr(N, ione, N, h_Q, lda, tau, dT, nb, &info);
if (info != 0) {
printf("magma_cunghr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
exit(1);
}
#if defined(PRECISION_z) || defined(PRECISION_c)
lapackf77_chst01(&N, &ione, &N,
h_A, &lda, h_R, &lda,
h_Q, &lda, twork, <work, rwork, result);
#else
lapackf77_chst01(&N, &ione, &N,
h_A, &lda, h_R, &lda,
h_Q, &lda, twork, <work, result);
#endif
TESTING_FREE_PIN( h_Q );
TESTING_FREE_CPU( twork );
#if defined(PRECISION_z) || defined(PRECISION_c)
TESTING_FREE_CPU( rwork );
#endif
}
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_cgehrd(&N, &ione, &N, h_R, &lda, tau, h_work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cgehrd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* =====================================================================
Print performance and error.
=================================================================== */
if ( opts.lapack ) {
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
}
else {
printf("%5d --- ( --- ) %7.2f (%7.2f)",
(int) N, gpu_perf, gpu_time );
}
if ( opts.check ) {
printf(" %8.2e %8.2e %s\n",
result[0]*eps, result[1]*eps,
( ( (result[0]*eps < tol) && (result[1]*eps < tol) ) ? "ok" : "failed") );
status += ! (result[0]*eps < tol);
status += ! (result[1]*eps < tol);
}
else {
printf(" --- ---\n");
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( tau );
TESTING_FREE_PIN( h_R );
TESTING_FREE_PIN( h_work );
TESTING_FREE_DEV( dT );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例11: main
//.........这里部分代码省略.........
#ifdef REAL
tol2 = 0;
#endif
}
bool okay; okay = (error <= tol2);
status += ! okay;
mkl_warning |= ! okay;
/* ====================================================================
Check for NAN and INF propagation
=================================================================== */
#define h_A(i_, j_) (h_A + (i_) + (j_)*lda)
#define d_A(i_, j_) (d_A + (i_) + (j_)*ldda)
i = rand() % N;
j = rand() % N;
magma_int_t tmp;
if ( uplo[iuplo] == MagmaLower && i < j ) {
tmp = i;
i = j;
j = tmp;
}
else if ( uplo[iuplo] == MagmaUpper && i > j ) {
tmp = i;
i = j;
j = tmp;
}
*h_A(i,j) = MAGMA_C_NAN;
magma_csetvector( 1, h_A(i,j), 1, d_A(i,j), 1 );
norm_magma = magmablas_clanhe( norm[inorm], uplo[iuplo], N, d_A, ldda, d_work, N );
norm_lapack = lapackf77_clanhe( lapack_norm_const( norm[inorm] ),
lapack_uplo_const( uplo[iuplo] ),
&N, h_A, &lda, h_work );
bool nan_okay; nan_okay = isnan(norm_magma);
bool la_nan_okay; la_nan_okay = isnan(norm_lapack);
lapack_nan_fail += ! la_nan_okay;
status += ! nan_okay;
*h_A(i,j) = MAGMA_C_INF;
magma_csetvector( 1, h_A(i,j), 1, d_A(i,j), 1 );
norm_magma = magmablas_clanhe( norm[inorm], uplo[iuplo], N, d_A, ldda, d_work, N );
norm_lapack = lapackf77_clanhe( lapack_norm_const( norm[inorm] ),
lapack_uplo_const( uplo[iuplo] ),
&N, h_A, &lda, h_work );
bool inf_okay; inf_okay = isinf(norm_magma);
bool la_inf_okay; la_inf_okay = isinf(norm_lapack);
lapack_inf_fail += ! la_inf_okay;
status += ! inf_okay;
#ifdef MAGMA_WITH_MKL
if ( mkl_single_thread ) {
// end single thread to work around MKL bug
magma_set_lapack_numthreads( la_threads );
}
#endif
printf("%5d %4c %4c %7.2f (%7.2f) %7.2f (%7.2f) %#9.3g %-6s %6s%1s %6s%1s\n",
(int) N,
lapacke_norm_const( norm[inorm] ),
lapacke_uplo_const( uplo[iuplo] ),
cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
error,
(okay ? "ok" : "failed"),
(nan_okay ? "ok" : "failed"), (la_nan_okay ? " " : "*"),
(inf_okay ? "ok" : "failed"), (la_inf_okay ? " " : "*"));
cleanup:
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_work );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_work );
fflush( stdout );
} // end iter
if ( opts.niter > 1 ) {
printf( "\n" );
}
}} // end iuplo, inorm
printf( "\n" );
}
// don't print "failed" here because then run_tests.py thinks MAGMA failed
if ( lapack_nan_fail ) {
printf( "* Warning: LAPACK did not pass NAN propagation test; upgrade to LAPACK version >= 3.4.2 (Sep. 2012)\n" );
}
if ( lapack_inf_fail ) {
printf( "* Warning: LAPACK did not pass INF propagation test\n" );
}
if ( mkl_warning ) {
printf("* MKL (e.g., 11.1) has a bug in clanhe with multiple threads;\n"
" corrected in 11.2 for one, inf, max norms, but still in Frobenius norm.\n"
" Try again with MKL_NUM_THREADS=1.\n" );
}
opts.cleanup();
TESTING_FINALIZE();
return status;
}
示例12: main
//.........这里部分代码省略.........
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
for(int i=0; i<batchCount; i++)
{
blasf77_cgemm(
lapack_trans_const(opts.transA), lapack_trans_const(opts.transB),
&M, &N, &K,
&alpha, h_A + i*lda*An, &lda,
h_B + i*ldb*Bn, &ldb,
&beta, h_C + i*ldc*N, &ldc );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
}
/* =====================================================================
Check the result
=================================================================== */
if ( opts.lapack ) {
// compute relative error for both magma & cublas, relative to lapack,
// |C_magma - C_lapack| / |C_lapack|
magma_error = 0.0;
cublas_error = 0.0;
for(int s=0; s<batchCount; s++)
{
magma_int_t C_batchSize = ldc * N;
Cnorm = lapackf77_clange( "M", &M, &N, h_C + s*C_batchSize, &ldc, work );
blasf77_caxpy( &C_batchSize, &c_neg_one, h_C + s*C_batchSize, &ione, h_Cmagma + s*C_batchSize, &ione );
magma_err = lapackf77_clange( "M", &M, &N, h_Cmagma + s*C_batchSize, &ldc, work ) / Cnorm;
if ( isnan(magma_err) || isinf(magma_err) ) {
magma_error = magma_err;
break;
}
magma_error = max(fabs(magma_err), magma_error);
blasf77_caxpy( &C_batchSize, &c_neg_one, h_C + s*C_batchSize, &ione, h_Ccublas + s*C_batchSize, &ione );
cublas_err = lapackf77_clange( "M", &M, &N, h_Ccublas + s*C_batchSize, &ldc, work ) / Cnorm;
if ( isnan(cublas_err) || isinf(cublas_err) ) {
cublas_error = cublas_err;
break;
}
cublas_error = max(fabs(cublas_err), cublas_error);
}
printf("%10d %5d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %8.2e \n",
(int) batchCount, (int) M, (int) N, (int) K,
magma_perf, 1000.*magma_time,
cublas_perf, 1000.*cublas_time,
cpu_perf, 1000.*cpu_time,
magma_error, cublas_error);
}
else {
// compute relative error for magma, relative to cublas
Cnorm = lapackf77_clange( "M", &M, &NN, h_Ccublas, &ldc, work );
blasf77_caxpy( &sizeC, &c_neg_one, h_Ccublas, &ione, h_Cmagma, &ione );
magma_error = lapackf77_clange( "M", &M, &NN, h_Cmagma, &ldc, work ) / Cnorm;
printf("%10d %5d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) --- ( --- ) %8.2e ---\n",
(int) batchCount, (int) M, (int) N, (int) K,
magma_perf, 1000.*magma_time,
cublas_perf, 1000.*cublas_time,
magma_error );
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_B );
TESTING_FREE_CPU( h_C );
TESTING_FREE_CPU( h_Cmagma );
TESTING_FREE_CPU( h_Ccublas );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_B );
TESTING_FREE_DEV( d_C );
TESTING_FREE_DEV( A_array );
TESTING_FREE_DEV( B_array );
TESTING_FREE_DEV( C_array );
fflush( stdout);
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例13: main
//.........这里部分代码省略.........
TESTING_MALLOC_CPU( h_xcublas, magmaDoubleComplex, N );
TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N );
TESTING_MALLOC_DEV( d_x, magmaDoubleComplex, N );
/* Initialize the matrices */
/* Factor A into LU to get well-conditioned triangular matrix.
* Copy L to U, since L seems okay when used with non-unit diagonal
* (i.e., from U), while U fails when used with unit diagonal. */
lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A );
lapackf77_zgetrf( &N, &N, h_A, &lda, ipiv, &info );
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < j; ++i ) {
*h_A(i,j) = *h_A(j,i);
}
}
lapackf77_zlarnv( &ione, ISEED, &N, h_b );
blasf77_zcopy( &N, h_b, &ione, h_x, &ione );
/* =====================================================================
Performs operation using CUBLAS
=================================================================== */
magma_zsetmatrix( N, N, h_A, lda, d_A, ldda );
magma_zsetvector( N, h_x, 1, d_x, 1 );
cublas_time = magma_sync_wtime( NULL );
cublasZtrsv( opts.handle, cublas_uplo_const(opts.uplo),
cublas_trans_const(opts.transA), cublas_diag_const(opts.diag),
N,
d_A, ldda,
d_x, 1 );
cublas_time = magma_sync_wtime( NULL ) - cublas_time;
cublas_perf = gflops / cublas_time;
magma_zgetvector( N, d_x, 1, h_xcublas, 1 );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
blasf77_ztrsv( lapack_uplo_const(opts.uplo), lapack_trans_const(opts.transA), lapack_diag_const(opts.diag),
&N,
h_A, &lda,
h_x, &ione );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
}
/* =====================================================================
Check the result
=================================================================== */
// ||b - Ax|| / (||A||*||x||)
// error for CUBLAS
normA = lapackf77_zlange( "F", &N, &N, h_A, &lda, work );
normx = lapackf77_zlange( "F", &N, &ione, h_xcublas, &ione, work );
blasf77_ztrmv( lapack_uplo_const(opts.uplo), lapack_trans_const(opts.transA), lapack_diag_const(opts.diag),
&N,
h_A, &lda,
h_xcublas, &ione );
blasf77_zaxpy( &N, &c_neg_one, h_b, &ione, h_xcublas, &ione );
normr = lapackf77_zlange( "F", &N, &ione, h_xcublas, &N, work );
cublas_error = normr / (normA*normx);
if ( opts.lapack ) {
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) N,
cublas_perf, 1000.*cublas_time,
cpu_perf, 1000.*cpu_time,
cublas_error, (cublas_error < tol ? "ok" : "failed"));
status += ! (cublas_error < tol);
}
else {
printf("%5d %7.2f (%7.2f) --- ( --- ) %8.2e %s\n",
(int) N,
cublas_perf, 1000.*cublas_time,
cublas_error, (cublas_error < tol ? "ok" : "failed"));
status += ! (cublas_error < tol);
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_b );
TESTING_FREE_CPU( h_x );
TESTING_FREE_CPU( h_xcublas );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_x );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例14: main
//.........这里部分代码省略.........
printf("magma_zcgeqrsv returned error %d: %s.\n",
(int) info, magma_strerror( info ));
// compute the residual
magma_zgetmatrix( N, nrhs, d_X, lddx, h_X, ldb );
blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
&c_neg_one, h_A, &lda,
h_X, &ldb,
&c_one, h_R, &ldb);
Anorm = lapackf77_zlange("f", &M, &N, h_A, &lda, work);
//=====================================================================
// Double Precision Solve
//=====================================================================
magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
magma_zsetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
gpu_time = magma_wtime();
magma_zgels_gpu( MagmaNoTrans, M, N, nrhs, d_A, ldda,
d_B, lddb, h_workd, lworkgpu, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perfd = gflops / gpu_time;
//=====================================================================
// Single Precision Solve
//=====================================================================
magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
magma_zsetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
/* The allocation of d_SA and d_SB is done here to avoid
* to double the memory used on GPU with zcgeqrsv */
TESTING_MALLOC_DEV( d_SA, magmaFloatComplex, ldda*N );
TESTING_MALLOC_DEV( d_SB, magmaFloatComplex, lddb*nrhs );
magmablas_zlag2c( M, N, d_A, ldda, d_SA, ldda, &info );
magmablas_zlag2c( N, nrhs, d_B, lddb, d_SB, lddb, &info );
gpu_time = magma_wtime();
magma_cgels_gpu( MagmaNoTrans, M, N, nrhs, d_SA, ldda,
d_SB, lddb, h_works, lhwork, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perfs = gflops / gpu_time;
TESTING_FREE_DEV( d_SA );
TESTING_FREE_DEV( d_SB );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
lapackf77_zlacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_X, &ldb );
cpu_time = magma_wtime();
lapackf77_zgels( MagmaNoTransStr, &M, &N, &nrhs,
h_A, &lda, h_X, &ldb, h_workd, &lhwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_zgels returned error %d: %s.\n",
(int) info, magma_strerror( info ));
blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
&c_neg_one, h_A2, &lda,
h_X, &ldb,
&c_one, h_B, &ldb );
cpu_error = lapackf77_zlange("f", &M, &nrhs, h_B, &ldb, work) / (min_mn*Anorm);
gpu_error = lapackf77_zlange("f", &M, &nrhs, h_R, &ldb, work) / (min_mn*Anorm);
// error relative to LAPACK
size = M*nrhs;
blasf77_zaxpy( &size, &c_neg_one, h_B, &ione, h_R, &ione );
error = lapackf77_zlange("f", &M, &nrhs, h_R, &ldb, work) / (min_mn*Anorm);
printf("%5d %5d %5d %7.2f %7.2f %7.2f %7.2f %4d %8.2e %8.2e %8.2e %s\n",
(int) M, (int) N, (int) nrhs,
cpu_perf, gpu_perfd, gpu_perfs, gpu_perf,
(int) qrsv_iters,
cpu_error, gpu_error, error, (error < tol ? "ok" : "failed"));
status += ! (error < tol);
TESTING_FREE_CPU( tau );
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_A2 );
TESTING_FREE_CPU( h_B );
TESTING_FREE_CPU( h_X );
TESTING_FREE_CPU( h_R );
TESTING_FREE_CPU( h_workd );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_B );
TESTING_FREE_DEV( d_X );
TESTING_FREE_DEV( d_T );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例15: main
//.........这里部分代码省略.........
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2);
TESTING_MALLOC_PIN( h_R, magmaFloatComplex, n2);
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda * N * batchCount);
TESTING_MALLOC_DEV( dinfo_magma, magma_int_t, batchCount);
magma_malloc((void**)&d_A_array, batchCount * sizeof(*d_A_array));
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
for(int i=0; i<batchCount; i++)
{
magma_cmake_hpd( N, h_A + i * lda * N, lda );// need modification
}
magma_int_t columns = N * batchCount;
lapackf77_clacpy( MagmaUpperLowerStr, &N, &(columns), h_A, &lda, h_R, &lda );
magma_csetmatrix( N, columns, h_A, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
cset_pointer(d_A_array, d_A, ldda, 0, 0, ldda * N, batchCount, queue);
gpu_time = magma_sync_wtime(NULL);
info = magma_cpotrf_batched( opts.uplo, N, d_A_array, ldda, dinfo_magma, batchCount, queue);
gpu_time = magma_sync_wtime(NULL) - gpu_time;
gpu_perf = gflops / gpu_time;
magma_int_t *cpu_info = (magma_int_t*) malloc(batchCount*sizeof(magma_int_t));
magma_getvector( batchCount, sizeof(magma_int_t), dinfo_magma, 1, cpu_info, 1);
for(int i=0; i<batchCount; i++)
{
if(cpu_info[i] != 0 ){
printf("magma_cpotrf_batched matrix %d returned internal error %d\n",i, (int)cpu_info[i] );
}
}
if (info != 0)
printf("magma_cpotrf_batched returned argument error %d: %s.\n", (int) info, magma_strerror( info ));
if ( opts.lapack ) {
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
for(int i=0; i<batchCount; i++)
{
lapackf77_cpotrf( lapack_uplo_const(opts.uplo), &N, h_A + i * lda * N, &lda, &info );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cpotrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
magma_cgetmatrix( N, columns, d_A, ldda, h_R, lda );
magma_int_t NN = lda*N;
char const uplo = 'l'; // lapack_uplo_const(opts.uplo)
float err = 0.0;
for(int i=0; i<batchCount; i++)
{
error = lapackf77_clanhe("f", &uplo, &N, h_A + i * lda*N, &lda, work);
blasf77_caxpy(&NN, &c_neg_one, h_A + i * lda*N, &ione, h_R + i * lda*N, &ione);
error = lapackf77_clanhe("f", &uplo, &N, h_R + i * lda*N, &lda, work) / error;
if ( isnan(error) || isinf(error) ) {
err = error;
break;
}
err = max(fabs(error),err);
}
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int)batchCount, (int) N, cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000., err, (error < tol ? "ok" : "failed"));
status += ! (err < tol);
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f) --- \n",
(int)batchCount, (int) N, gpu_perf, gpu_time*1000. );
}
TESTING_FREE_CPU( h_A );
TESTING_FREE_PIN( h_R );
TESTING_FREE_DEV( d_A );
TESTING_FREE_DEV( d_A_array );
TESTING_FREE_DEV( dinfo_magma );
free(cpu_info);
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}