本文整理汇总了C++中TESTING_INIT函数的典型用法代码示例。如果您正苦于以下问题:C++ TESTING_INIT函数的具体用法?C++ TESTING_INIT怎么用?C++ TESTING_INIT使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了TESTING_INIT函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing slaset_band
Code is very similar to testing_slacpy.cpp
*/
int main( int argc, char** argv)
{
TESTING_INIT();
#define h_A(i_,j_) (h_A + (i_) + (j_)*lda)
#define d_A(i_,j_) (d_A + (i_) + (j_)*ldda)
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
float c_neg_one = MAGMA_S_NEG_ONE;
float *h_A, *h_R;
float *d_A;
float offdiag = MAGMA_S_MAKE( 1.2000, 6.7000 );
float diag = MAGMA_S_MAKE( 3.1415, 2.7183 );
magma_int_t M, N, nb, cnt, size, lda, ldb, ldda;
magma_int_t ione = 1;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
nb = (opts.nb == 0 ? 32 : opts.nb);
magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull };
printf("K = nb = %d\n", (int) nb );
printf("uplo M N CPU GByte/s (ms) GPU GByte/s (ms) check\n");
printf("==================================================================\n");
for( int iuplo = 0; iuplo < 2; ++iuplo ) {
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
int inset = 0;
M = opts.msize[itest] + 2*inset;
N = opts.nsize[itest] + 2*inset;
lda = M;
ldb = lda;
ldda = ((M+31)/32)*32;
size = lda*N;
TESTING_MALLOC_CPU( h_A, float, size );
TESTING_MALLOC_CPU( h_R, float, size );
TESTING_MALLOC_DEV( d_A, float, ldda*N );
/* Initialize the matrix */
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < M; ++i ) {
h_A[i + j*lda] = MAGMA_S_MAKE( i + j/10000., j );
}
}
magma_ssetmatrix( M, N, h_A, lda, d_A, ldda );
/* =====================================================================
Performs operation on CPU
Also count number of elements touched.
=================================================================== */
cpu_time = magma_wtime();
cnt = 0;
for( int j=inset; j < N-inset; ++j ) {
for( int k=0; k < nb; ++k ) { // set k-th sub- or super-diagonal
if ( k == 0 && j < M-inset ) {
*h_A(j,j) = diag;
cnt += 1;
}
else if ( uplo[iuplo] == MagmaLower && j+k < M-inset ) {
*h_A(j+k,j) = offdiag;
cnt += 1;
}
else if ( uplo[iuplo] == MagmaUpper && j-k >= inset && j-k < M-inset ) {
*h_A(j-k,j) = offdiag;
cnt += 1;
}
}
}
gbytes = cnt / 1e9;
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_sync_wtime( 0 );
int mm = M - 2*inset;
int nn = N - 2*inset;
magmablas_slaset_band( uplo[iuplo], mm, nn, nb, offdiag, diag, d_A(inset,inset), ldda );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* =====================================================================
Check the result
=================================================================== */
//.........这里部分代码省略.........
示例2: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zlaset
Code is very similar to testing_zlacpy.cpp
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
double error, work[1];
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magmaDoubleComplex *h_A, *h_R;
magmaDoubleComplex_ptr d_A;
magmaDoubleComplex offdiag, diag;
magma_int_t M, N, size, lda, ldda;
magma_int_t ione = 1;
magma_int_t status = 0;
magma_opts opts;
opts.parse_opts( argc, argv );
magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull };
printf("%% uplo M N offdiag diag CPU GByte/s (ms) GPU GByte/s (ms) check\n");
printf("%%===================================================================================\n");
for( int iuplo = 0; iuplo < 3; ++iuplo ) {
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
for( int ival = 0; ival < 4; ++ival ) {
// test combinations of zero & non-zero:
// ival offdiag diag
// 0 0 0
// 1 0 3.14
// 2 1.23 0
// 3 1.23 3.14
offdiag = MAGMA_Z_MAKE( 1.2345, 6.7890 ) * (ival / 2);
diag = MAGMA_Z_MAKE( 3.1415, 2.7183 ) * (ival % 2);
M = opts.msize[itest];
N = opts.nsize[itest];
//M += 2; // space for insets
//N += 2;
lda = M;
ldda = magma_roundup( M, opts.align );
size = lda*N;
if ( uplo[iuplo] == MagmaLower ) {
// save lower trapezoid (with diagonal)
if ( M > N ) {
gbytes = sizeof(magmaDoubleComplex) * (1.*M*N - 0.5*N*(N-1)) / 1e9;
} else {
gbytes = sizeof(magmaDoubleComplex) * 0.5*M*(M+1) / 1e9;
}
}
else if ( uplo[iuplo] == MagmaUpper ) {
// save upper trapezoid (with diagonal)
if ( N > M ) {
gbytes = sizeof(magmaDoubleComplex) * (1.*M*N - 0.5*M*(M-1)) / 1e9;
} else {
gbytes = sizeof(magmaDoubleComplex) * 0.5*N*(N+1) / 1e9;
}
}
else {
// save entire matrix
gbytes = sizeof(magmaDoubleComplex) * 1.*M*N / 1e9;
}
TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, size );
TESTING_MALLOC_CPU( h_R, magmaDoubleComplex, size );
TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N );
/* Initialize the matrix */
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < M; ++i ) {
h_A[i + j*lda] = MAGMA_Z_MAKE( i + j/10000., j );
}
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
magmablasSetKernelStream( opts.queue );
gpu_time = magma_sync_wtime( opts.queue );
//magmablas_zlaset( uplo[iuplo], M-2, N-2, offdiag, diag, d_A+1+ldda, ldda ); // inset by 1 row & col
magmablas_zlaset( uplo[iuplo], M, N, offdiag, diag, d_A, ldda );
gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
//magma_int_t M2 = M-2; // inset by 1 row & col
//magma_int_t N2 = N-2;
//lapackf77_zlaset( lapack_uplo_const( uplo[iuplo] ), &M2, &N2, &offdiag, &diag, h_A+1+lda, &lda );
lapackf77_zlaset( lapack_uplo_const( uplo[iuplo] ), &M, &N, &offdiag, &diag, h_A, &lda );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
//.........这里部分代码省略.........
示例3: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing slansy
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
float *h_A;
float *h_work;
magmaFloat_ptr d_A;
magmaFloat_ptr d_work;
magma_int_t N, n2, lda, ldda;
magma_int_t idist = 3; // normal distribution (otherwise max norm is always ~ 1)
magma_int_t ISEED[4] = {0,0,0,1};
float error, norm_magma, norm_lapack;
magma_int_t status = 0;
bool mkl_warning = false;
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
magma_uplo_t uplo[] = { MagmaLower, MagmaUpper };
magma_norm_t norm[] = { MagmaInfNorm, MagmaOneNorm, MagmaMaxNorm };
// Double-Complex inf-norm not supported on Tesla (CUDA arch 1.x)
#if defined(PRECISION_z)
magma_int_t arch = magma_getdevice_arch();
if ( arch < 200 ) {
printf("!!!! NOTE: Double-Complex %s and %s norm are not supported\n"
"!!!! on CUDA architecture %d; requires arch >= 200.\n"
"!!!! It should report \"parameter number 1 had an illegal value\" below.\n\n",
MagmaInfNormStr, MagmaOneNormStr, (int) arch );
for( int inorm = 0; inorm < 2; ++inorm ) {
for( int iuplo = 0; iuplo < 2; ++iuplo ) {
printf( "Testing that magmablas_slansy( %s, %s, ... ) returns -1 error...\n",
lapack_norm_const( norm[inorm] ),
lapack_uplo_const( uplo[iuplo] ));
norm_magma = magmablas_slansy( norm[inorm], uplo[iuplo], 1, NULL, 1, NULL );
if ( norm_magma != -1 ) {
printf( "expected magmablas_slansy to return -1 error, but got %f\n", norm_magma );
status = 1;
}
}
}
printf( "...return values %s\n\n", (status == 0 ? "ok" : "failed") );
}
#endif
#ifdef MAGMA_WITH_MKL
printf( "\nNote: using single thread to work around MKL slansy bug.\n\n" );
#endif
printf(" N norm uplo CPU GByte/s (ms) GPU GByte/s (ms) error \n");
printf("=======================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int inorm = 0; inorm < 3; ++inorm ) {
for( int iuplo = 0; iuplo < 2; ++iuplo ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
n2 = lda*N;
ldda = roundup( N, opts.roundup );
// read upper or lower triangle
gbytes = 0.5*(N+1)*N*sizeof(float) / 1e9;
TESTING_MALLOC_CPU( h_A, float, n2 );
TESTING_MALLOC_CPU( h_work, float, N );
TESTING_MALLOC_DEV( d_A, float, ldda*N );
TESTING_MALLOC_DEV( d_work, float, N );
/* Initialize the matrix */
lapackf77_slarnv( &idist, ISEED, &n2, h_A );
magma_ssetmatrix( N, N, h_A, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
norm_magma = magmablas_slansy( norm[inorm], uplo[iuplo], N, d_A, ldda, d_work );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gbytes / gpu_time;
if (norm_magma == -1) {
printf( "%5d %4c skipped because it isn't supported on this GPU\n",
(int) N, lapacke_norm_const( norm[inorm] ));
continue;
}
if (norm_magma < 0)
printf("magmablas_slansy returned error %f: %s.\n",
norm_magma, magma_strerror( (int) norm_magma ));
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
#ifdef MAGMA_WITH_MKL
// MKL (11.1.2) has bug in multi-threaded slansy; use single thread to work around
//.........这里部分代码省略.........
示例4: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgetrf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0;
float error;
magmaFloatComplex *h_A;
magma_int_t *ipiv;
magma_int_t M, N, n2, lda, info, min_mn;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
printf("ngpu %d\n", (int) opts.ngpu );
if ( opts.check == 2 ) {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |Ax-b|/(N*|A|*|x|)\n");
}
else {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |PA-LU|/(N*|A|)\n");
}
printf("=========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
gflops = FLOPS_CGETRF( M, N ) / 1e9;
TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn );
TESTING_MALLOC_PIN( h_A, magmaFloatComplex, n2 );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
init_matrix( M, N, h_A, lda );
cpu_time = magma_wtime();
lapackf77_cgetrf(&M, &N, h_A, &lda, ipiv, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cgetrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
init_matrix( M, N, h_A, lda );
gpu_time = magma_wtime();
magma_cgetrf( M, N, h_A, lda, ipiv, &info);
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgetrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the factorization
=================================================================== */
if ( opts.lapack ) {
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)",
(int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f)",
(int) M, (int) N, gpu_perf, gpu_time );
}
if ( opts.check == 2 ) {
error = get_residual( M, N, h_A, lda, ipiv );
printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed"));
status += ! (error < tol);
}
else if ( opts.check ) {
error = get_LU_error( M, N, h_A, lda, ipiv );
printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed"));
status += ! (error < tol);
}
else {
printf(" --- \n");
}
TESTING_FREE_CPU( ipiv );
TESTING_FREE_PIN( h_A );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
//.........这里部分代码省略.........
示例5: main
/* ////////////////////////////////////////////////////////////////////////////
-- testing any solver
*/
int main( int argc, char** argv )
{
magma_int_t info = 0;
TESTING_INIT();
magma_copts zopts;
magma_queue_t queue=NULL;
magma_queue_create( /*devices[ opts->device ],*/ &queue );
real_Double_t res;
magma_c_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR},
B={Magma_CSR}, B_d={Magma_CSR};
int i=1;
real_Double_t start, end;
CHECK( magma_cparse_opts( argc, argv, &zopts, &i, queue ));
B.blocksize = zopts.blocksize;
B.alignment = zopts.alignment;
while( i < argc ) {
if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test
i++;
magma_int_t laplace_size = atoi( argv[i] );
CHECK( magma_cm_5stencil( laplace_size, &A, queue ));
} else { // file-matrix test
CHECK( magma_c_csr_mtx( &A, argv[i], queue ));
}
printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
(int) A.num_rows,(int) A.num_cols,(int) A.nnz );
// scale matrix
CHECK( magma_cmscale( &A, zopts.scaling, queue ));
// remove nonzeros in matrix
start = magma_sync_wtime( queue );
for (int j=0; j<10; j++)
CHECK( magma_cmcsrcompressor( &A, queue ));
end = magma_sync_wtime( queue );
printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 );
// transpose
CHECK( magma_cmtranspose( A, &AT, queue ));
// convert, copy back and forth to check everything works
CHECK( magma_cmconvert( AT, &B, Magma_CSR, Magma_CSR, queue ));
magma_cmfree(&AT, queue );
CHECK( magma_cmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ));
magma_cmfree(&B, queue );
start = magma_sync_wtime( queue );
for (int j=0; j<10; j++)
CHECK( magma_cmcsrcompressor_gpu( &B_d, queue ));
end = magma_sync_wtime( queue );
printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 );
CHECK( magma_cmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue ));
magma_cmfree(&B_d, queue );
CHECK( magma_cmconvert( B, &AT, Magma_CSR, Magma_CSR, queue ));
magma_cmfree(&B, queue );
// transpose back
CHECK( magma_cmtranspose( AT, &A2, queue ));
magma_cmfree(&AT, queue );
CHECK( magma_cmdiff( A, A2, &res, queue ));
printf("# ||A-B||_F = %8.2e\n", res);
if ( res < .000001 )
printf("# tester matrix compressor: ok\n");
else
printf("# tester matrix compressor: failed\n");
magma_cmfree(&A, queue );
magma_cmfree(&A2, queue );
i++;
}
cleanup:
magma_cmfree(&AT, queue );
magma_cmfree(&B, queue );
magma_cmfree(&A, queue );
magma_cmfree(&A2, queue );
magma_queue_destroy( queue );
TESTING_FINALIZE();
return info;
}
示例6: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgels
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
float gpu_error, cpu_error, error, Anorm, work[1];
magmaFloatComplex c_one = MAGMA_C_ONE;
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex *h_A, *h_A2, *h_B, *h_X, *h_R, *tau, *h_work, tmp[1];
magmaFloatComplex *d_A, *d_B;
magma_int_t M, N, size, nrhs, lda, ldb, ldda, lddb, min_mn, max_mn, nb, info;
magma_int_t lworkgpu, lhwork, lhwork2;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_opts opts;
parse_opts( argc, argv, &opts );
magma_int_t status = 0;
float tol = opts.tolerance * lapackf77_slamch("E");
nrhs = opts.nrhs;
printf(" ||b-Ax|| / (N||A||) ||dx-x||/(N||A||)\n");
printf(" M N NRHS CPU GFlop/s (sec) GPU GFlop/s (sec) CPU GPU \n");
printf("===================================================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
if ( M < N ) {
printf( "%5d %5d %5d skipping because M < N is not yet supported.\n", (int) M, (int) N, (int) nrhs );
continue;
}
min_mn = min(M, N);
max_mn = max(M, N);
lda = M;
ldb = max_mn;
size = lda*N;
ldda = ((M+31)/32)*32;
lddb = ((max_mn+31)/32)*32;
nb = magma_get_cgeqrf_nb(M);
gflops = (FLOPS_CGEQRF( M, N ) + FLOPS_CGEQRS( M, N, nrhs )) / 1e9;
lworkgpu = (M - N + nb)*(nrhs + nb) + nrhs*nb;
// query for workspace size
lhwork = -1;
lapackf77_cgeqrf(&M, &N, NULL, &M, NULL, tmp, &lhwork, &info);
lhwork2 = (magma_int_t) MAGMA_C_REAL( tmp[0] );
lhwork = -1;
lapackf77_cunmqr( MagmaLeftStr, MagmaConjTransStr,
&M, &nrhs, &min_mn, NULL, &lda, NULL,
NULL, &ldb, tmp, &lhwork, &info);
lhwork = (magma_int_t) MAGMA_C_REAL( tmp[0] );
lhwork = max( max( lhwork, lhwork2 ), lworkgpu );
TESTING_MALLOC_CPU( tau, magmaFloatComplex, min_mn );
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda*N );
TESTING_MALLOC_CPU( h_A2, magmaFloatComplex, lda*N );
TESTING_MALLOC_CPU( h_B, magmaFloatComplex, ldb*nrhs );
TESTING_MALLOC_CPU( h_X, magmaFloatComplex, ldb*nrhs );
TESTING_MALLOC_CPU( h_R, magmaFloatComplex, ldb*nrhs );
TESTING_MALLOC_CPU( h_work, magmaFloatComplex, lhwork );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
TESTING_MALLOC_DEV( d_B, magmaFloatComplex, lddb*nrhs );
/* Initialize the matrices */
lapackf77_clarnv( &ione, ISEED, &size, h_A );
lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_A2, &lda );
// make random RHS
size = M*nrhs;
lapackf77_clarnv( &ione, ISEED, &size, h_B );
lapackf77_clacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
// make consistent RHS
//size = N*nrhs;
//lapackf77_clarnv( &ione, ISEED, &size, h_X );
//blasf77_cgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
// &c_one, h_A, &lda,
// h_X, &ldb,
// &c_zero, h_B, &ldb );
//lapackf77_clacpy( MagmaUpperLowerStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_csetmatrix( M, N, h_A, lda, d_A, ldda );
magma_csetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
gpu_time = magma_wtime();
magma_cgels3_gpu( MagmaNoTrans, M, N, nrhs, d_A, ldda,
d_B, lddb, h_work, lworkgpu, &info);
gpu_time = magma_wtime() - gpu_time;
//.........这里部分代码省略.........
示例7: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing slacpy_batched
Code is very similar to testing_sgeadd_batched.cpp
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
float c_neg_one = MAGMA_S_NEG_ONE;
float *h_A, *h_B;
magmaFloat_ptr d_A, d_B;
float **hAarray, **hBarray, **dAarray, **dBarray;
magma_int_t M, N, mb, nb, size, lda, ldda, mstride, nstride, ntile;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
magma_opts opts( MagmaOptsBatched );
opts.parse_opts( argc, argv );
mb = (opts.nb == 0 ? 32 : opts.nb);
nb = (opts.nb == 0 ? 64 : opts.nb);
mstride = 2*mb;
nstride = 3*nb;
printf("%% mb=%d, nb=%d, mstride=%d, nstride=%d\n", (int) mb, (int) nb, (int) mstride, (int) nstride );
printf("%% M N ntile CPU Gflop/s (ms) GPU Gflop/s (ms) check\n");
printf("%%================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
lda = M;
ldda = magma_roundup( M, opts.align ); // multiple of 32 by default
size = lda*N;
if ( N < nb || M < nb ) {
ntile = 0;
} else {
ntile = min( (M - nb)/mstride + 1,
(N - nb)/nstride + 1 );
}
gbytes = 2.*mb*nb*ntile / 1e9;
TESTING_MALLOC_CPU( h_A, float, lda *N );
TESTING_MALLOC_CPU( h_B, float, lda *N );
TESTING_MALLOC_DEV( d_A, float, ldda*N );
TESTING_MALLOC_DEV( d_B, float, ldda*N );
TESTING_MALLOC_CPU( hAarray, float*, ntile );
TESTING_MALLOC_CPU( hBarray, float*, ntile );
TESTING_MALLOC_DEV( dAarray, float*, ntile );
TESTING_MALLOC_DEV( dBarray, float*, ntile );
lapackf77_slarnv( &ione, ISEED, &size, h_A );
lapackf77_slarnv( &ione, ISEED, &size, h_B );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_ssetmatrix( M, N, h_A, lda, d_A, ldda );
magma_ssetmatrix( M, N, h_B, lda, d_B, ldda );
// setup pointers
for( magma_int_t tile = 0; tile < ntile; ++tile ) {
magma_int_t offset = tile*mstride + tile*nstride*ldda;
hAarray[tile] = &d_A[offset];
hBarray[tile] = &d_B[offset];
}
magma_setvector( ntile, sizeof(float*), hAarray, 1, dAarray, 1 );
magma_setvector( ntile, sizeof(float*), hBarray, 1, dBarray, 1 );
gpu_time = magma_sync_wtime( opts.queue );
magmablas_slacpy_batched( MagmaFull, mb, nb, dAarray, ldda, dBarray, ldda, ntile, opts.queue );
gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
for( magma_int_t tile = 0; tile < ntile; ++tile ) {
magma_int_t offset = tile*mstride + tile*nstride*lda;
lapackf77_slacpy( MagmaFullStr, &mb, &nb,
&h_A[offset], &lda,
&h_B[offset], &lda );
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
/* =====================================================================
Check the result
=================================================================== */
magma_sgetmatrix( M, N, d_B, ldda, h_A, lda );
blasf77_saxpy(&size, &c_neg_one, h_A, &ione, h_B, &ione);
error = lapackf77_slange("f", &M, &N, h_B, &lda, work);
bool okay = (error == 0);
//.........这里部分代码省略.........
示例8: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing claset
Code is very similar to testing_clacpy.cpp
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex *h_A, *h_R;
magmaFloatComplex *d_A;
magmaFloatComplex offdiag = MAGMA_C_MAKE( 1.2000, 6.7000 );
magmaFloatComplex diag = MAGMA_C_MAKE( 3.1415, 2.7183 );
magma_int_t M, N, size, lda, ldb, ldda;
magma_int_t ione = 1;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull };
printf("uplo M N CPU GByte/s (ms) GPU GByte/s (ms) check\n");
printf("==================================================================\n");
for( int iuplo = 0; iuplo < 3; ++iuplo ) {
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
//M += 2; // space for insets
//N += 2;
lda = M;
ldb = lda;
ldda = ((M+31)/32)*32;
size = lda*N;
if ( uplo[iuplo] == MagmaLower || uplo[iuplo] == MagmaUpper ) {
// save triangle (with diagonal)
// TODO wrong for trapezoid
gbytes = sizeof(magmaFloatComplex) * 0.5*N*(N+1) / 1e9;
}
else {
// save entire matrix
gbytes = sizeof(magmaFloatComplex) * 1.*M*N / 1e9;
}
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, size );
TESTING_MALLOC_CPU( h_R, magmaFloatComplex, size );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
/* Initialize the matrix */
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < M; ++i ) {
h_A[i + j*lda] = MAGMA_C_MAKE( i + j/10000., j );
}
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_csetmatrix( M, N, h_A, lda, d_A, ldda );
gpu_time = magma_sync_wtime( 0 );
//magmablas_claset( uplo[iuplo], M-2, N-2, offdiag, diag, d_A+1+ldda, ldda ); // inset by 1 row & col
magmablas_claset( uplo[iuplo], M, N, offdiag, diag, d_A, ldda );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
//magma_int_t M2 = M-2; // inset by 1 row & col
//magma_int_t N2 = N-2;
//lapackf77_claset( lapack_uplo_const( uplo[iuplo] ), &M2, &N2, &offdiag, &diag, h_A+1+lda, &lda );
lapackf77_claset( lapack_uplo_const( uplo[iuplo] ), &M, &N, &offdiag, &diag, h_A, &lda );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
/* =====================================================================
Check the result
=================================================================== */
magma_cgetmatrix( M, N, d_A, ldda, h_R, lda );
blasf77_caxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_clange("f", &M, &N, h_R, &lda, work);
printf("%4c %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %s\n",
lapacke_uplo_const( uplo[iuplo] ), (int) M, (int) N,
cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
(error == 0. ? "ok" : "failed") );
status += ! (error == 0.);
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_R );
TESTING_FREE_DEV( d_A );
fflush( stdout );
//.........这里部分代码省略.........
示例9: main
/* ////////////////////////////////////////////////////////////////////////////
-- testing sparse matrix vector product
*/
int main( int argc, char** argv )
{
TESTING_INIT();
magma_queue_t queue;
magma_queue_create( /*devices[ opts->device ],*/ &queue );
magma_c_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL;
hA_SELLP.blocksize = 8;
hA_SELLP.alignment = 8;
real_Double_t start, end, res;
magma_int_t *pntre;
magmaFloatComplex c_one = MAGMA_C_MAKE(1.0, 0.0);
magmaFloatComplex c_zero = MAGMA_C_MAKE(0.0, 0.0);
magma_int_t i, j;
for( i = 1; i < argc; ++i ) {
if ( strcmp("--blocksize", argv[i]) == 0 ) {
hA_SELLP.blocksize = atoi( argv[++i] );
} else if ( strcmp("--alignment", argv[i]) == 0 ) {
hA_SELLP.alignment = atoi( argv[++i] );
} else
break;
}
printf( "\n# usage: ./run_cspmv"
" [ --blocksize %d --alignment %d (for SELLP) ]"
" matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment );
while( i < argc ) {
if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test
i++;
magma_int_t laplace_size = atoi( argv[i] );
magma_cm_5stencil( laplace_size, &hA, queue );
} else { // file-matrix test
magma_c_csr_mtx( &hA, argv[i], queue );
}
printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
(int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz );
real_Double_t FLOPS = 2.0*hA.nnz/1e9;
magma_c_vector hx, hy, dx, dy, hrefvec, hcheck;
// init CPU vectors
magma_c_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue );
magma_c_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue );
// init DEV vectors
magma_c_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue );
magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
#ifdef MAGMA_WITH_MKL
// calling MKL with CSR
pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) );
pntre[0] = 0;
for (j=0; j<hA.num_rows; j++ ) {
pntre[j] = hA.row[j+1];
}
MKL_INT num_rows = hA.num_rows;
MKL_INT num_cols = hA.num_cols;
MKL_INT nnz = hA.nnz;
MKL_INT *col;
TESTING_MALLOC_CPU( col, MKL_INT, nnz );
for( magma_int_t t=0; t < hA.nnz; ++t ) {
col[ t ] = hA.col[ t ];
}
MKL_INT *row;
TESTING_MALLOC_CPU( row, MKL_INT, num_rows );
for( magma_int_t t=0; t < hA.num_rows; ++t ) {
row[ t ] = hA.col[ t ];
}
start = magma_wtime();
for (j=0; j<10; j++ ) {
mkl_ccsrmv( "N", &num_rows, &num_cols,
MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val),
col, row, pntre,
MKL_ADDR(hx.val),
MKL_ADDR(&c_zero), MKL_ADDR(hy.val) );
}
end = magma_wtime();
printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n",
(end-start)/10, FLOPS*10/(end-start) );
TESTING_FREE_CPU( row );
TESTING_FREE_CPU( col );
free(pntre);
#endif // MAGMA_WITH_MKL
// copy matrix to GPU
magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );
// SpMV on GPU (CSR) -- this is the reference!
start = magma_sync_wtime( queue );
for (j=0; j<10; j++)
//.........这里部分代码省略.........
示例10: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing znan_inf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
#define hA(i,j) (hA + (i) + (j)*lda)
magmaFloatComplex *hA, *dA;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t M, N, lda, ldda, size;
magma_int_t *ii, *jj;
magma_int_t i, j, cnt, tmp;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull };
printf("uplo M N CPU nan + inf GPU nan + inf actual nan + inf \n");
printf("===============================================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iuplo = 0; iuplo < 3; ++iuplo ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
lda = M;
ldda = ((M + 31)/32)*32;
size = lda*N;
/* Allocate memory for the matrix */
TESTING_MALLOC_CPU( hA, magmaFloatComplex, lda *N );
TESTING_MALLOC_DEV( dA, magmaFloatComplex, ldda*N );
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &size, hA );
// up to half of matrix is NAN, and
// up to half of matrix is INF.
magma_int_t cnt_nan = (magma_int_t)( (rand() / ((float)RAND_MAX)) * 0.5 * M*N );
magma_int_t cnt_inf = (magma_int_t)( (rand() / ((float)RAND_MAX)) * 0.5 * M*N );
magma_int_t total = cnt_nan + cnt_inf;
assert( cnt_nan >= 0 );
assert( cnt_inf >= 0 );
assert( total <= M*N );
// fill in indices
TESTING_MALLOC_CPU( ii, magma_int_t, size );
TESTING_MALLOC_CPU( jj, magma_int_t, size );
for( cnt=0; cnt < size; ++cnt ) {
ii[cnt] = cnt % M;
jj[cnt] = cnt / M;
}
// shuffle indices
for( cnt=0; cnt < total; ++cnt ) {
i = int( rand() / ((float)RAND_MAX) * size );
tmp=ii[cnt]; ii[cnt]=ii[i]; ii[i]=tmp;
tmp=jj[cnt]; jj[cnt]=jj[i]; jj[i]=tmp;
}
// fill in NAN and INF
// for uplo, count NAN and INF in triangular portion of A
int c_nan=0;
int c_inf=0;
for( cnt=0; cnt < cnt_nan; ++cnt ) {
i = ii[cnt];
j = jj[cnt];
*hA(i,j) = MAGMA_C_NAN;
if ( uplo[iuplo] == MagmaLower && i >= j ) { c_nan++; }
if ( uplo[iuplo] == MagmaUpper && i <= j ) { c_nan++; }
}
for( cnt=cnt_nan; cnt < cnt_nan + cnt_inf; ++cnt ) {
i = ii[cnt];
j = jj[cnt];
*hA(i,j) = MAGMA_C_INF;
if ( uplo[iuplo] == MagmaLower && i >= j ) { c_inf++; }
if ( uplo[iuplo] == MagmaUpper && i <= j ) { c_inf++; }
}
if ( uplo[iuplo] == MagmaLower || uplo[iuplo] == MagmaUpper ) {
cnt_nan = c_nan;
cnt_inf = c_inf;
total = cnt_nan + cnt_inf;
}
//printf( "nan %g + %gi\n", MAGMA_C_REAL( MAGMA_C_NAN ), MAGMA_C_REAL( MAGMA_C_NAN ) );
//printf( "inf %g + %gi\n", MAGMA_C_REAL( MAGMA_C_INF ), MAGMA_C_REAL( MAGMA_C_INF ) );
//magma_cprint( M, N, hA, lda );
magma_csetmatrix( M, N, hA, lda, dA, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_int_t c_cpu_nan=-1, c_cpu_inf=-1;
magma_int_t c_gpu_nan=-1, c_gpu_inf=-1;
magma_int_t c_cpu = magma_cnan_inf ( uplo[iuplo], M, N, hA, lda, &c_cpu_nan, &c_cpu_inf );
magma_int_t c_gpu = magma_cnan_inf_gpu( uplo[iuplo], M, N, dA, ldda, &c_gpu_nan, &c_gpu_inf );
//.........这里部分代码省略.........
示例11: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing ctrsm
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, magma_perf, magma_time=0, cublas_perf, cublas_time, cpu_perf=0, cpu_time=0;
float magma_error, cublas_error, work[1];
magma_int_t M, N, info;
magma_int_t Ak;
magma_int_t sizeA, sizeB;
magma_int_t lda, ldb, ldda, lddb;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t *ipiv;
magmaFloatComplex *h_A, *h_B, *h_Bcublas, *h_Bmagma, *h_B1, *h_X1, *h_X2;
magmaFloatComplex *d_A, *d_B;
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex c_one = MAGMA_C_ONE;
magmaFloatComplex alpha = MAGMA_C_MAKE( 0.29, -0.86 );
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
printf("side = %s, uplo = %s, transA = %s, diag = %s \n",
lapack_side_const(opts.side), lapack_uplo_const(opts.uplo),
lapack_trans_const(opts.transA), lapack_diag_const(opts.diag) );
printf(" M N MAGMA Gflop/s (ms) CUBLAS Gflop/s (ms) CPU Gflop/s (ms) MAGMA error CUBLAS error\n");
printf("==================================================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
gflops = FLOPS_CTRSM(opts.side, M, N) / 1e9;
if ( opts.side == MagmaLeft ) {
lda = M;
Ak = M;
} else {
lda = N;
Ak = N;
}
ldb = M;
ldda = ((lda+31)/32)*32;
lddb = ((ldb+31)/32)*32;
sizeA = lda*Ak;
sizeB = ldb*N;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda*Ak );
TESTING_MALLOC_CPU( h_B, magmaFloatComplex, ldb*N );
TESTING_MALLOC_CPU( h_B1, magmaFloatComplex, ldb*N );
TESTING_MALLOC_CPU( h_X1, magmaFloatComplex, ldb*N );
TESTING_MALLOC_CPU( h_X2, magmaFloatComplex, ldb*N );
TESTING_MALLOC_CPU( h_Bcublas, magmaFloatComplex, ldb*N );
TESTING_MALLOC_CPU( h_Bmagma, magmaFloatComplex, ldb*N );
TESTING_MALLOC_CPU( ipiv, magma_int_t, Ak );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*Ak );
TESTING_MALLOC_DEV( d_B, magmaFloatComplex, lddb*N );
/* Initialize the matrices */
/* Factor A into LU to get well-conditioned triangular matrix.
* Copy L to U, since L seems okay when used with non-unit diagonal
* (i.e., from U), while U fails when used with unit diagonal. */
lapackf77_clarnv( &ione, ISEED, &sizeA, h_A );
lapackf77_cgetrf( &Ak, &Ak, h_A, &lda, ipiv, &info );
for( int j = 0; j < Ak; ++j ) {
for( int i = 0; i < j; ++i ) {
*h_A(i,j) = *h_A(j,i);
}
}
lapackf77_clarnv( &ione, ISEED, &sizeB, h_B );
memcpy(h_B1, h_B, sizeB*sizeof(magmaFloatComplex));
/* =====================================================================
Performs operation using MAGMABLAS
=================================================================== */
magma_csetmatrix( Ak, Ak, h_A, lda, d_A, ldda );
magma_csetmatrix( M, N, h_B, ldb, d_B, lddb );
magma_time = magma_sync_wtime( NULL );
magmablas_ctrsm( opts.side, opts.uplo, opts.transA, opts.diag,
M, N,
alpha, d_A, ldda,
d_B, lddb );
magma_time = magma_sync_wtime( NULL ) - magma_time;
magma_perf = gflops / magma_time;
magma_cgetmatrix( M, N, d_B, lddb, h_Bmagma, ldb );
/* =====================================================================
//.........这里部分代码省略.........
示例12: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zpotrf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
magmaDoubleComplex *h_A, *h_R;
magmaDoubleComplex *d_A;
magma_int_t N, n2, lda, ldda, info;
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
double work[1], error;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
double tol = opts.tolerance * lapackf77_dlamch("E");
printf(" N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R_magma - R_lapack||_F / ||R_lapack||_F\n");
printf("========================================================\n");
for( int i = 0; i < opts.ntest; ++i ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[i];
lda = N;
n2 = lda*N;
ldda = ((N+31)/32)*32;
gflops = FLOPS_ZPOTRF( N ) / 1e9;
TESTING_MALLOC( h_A, magmaDoubleComplex, n2 );
TESTING_HOSTALLOC( h_R, magmaDoubleComplex, n2 );
TESTING_DEVALLOC( d_A, magmaDoubleComplex, ldda*N );
/* Initialize the matrix */
lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
magma_zmake_hpd( N, h_A, lda );
lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
magma_zsetmatrix( N, N, h_A, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_zpotrf_gpu( opts.uplo, N, d_A, ldda, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_zpotrf_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( opts.lapack ) {
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_zpotrf( &opts.uplo, &N, h_A, &lda, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_zpotrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
magma_zgetmatrix( N, N, d_A, ldda, h_R, lda );
error = lapackf77_zlange("f", &N, &N, h_A, &lda, work);
blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_zlange("f", &N, &N, h_R, &lda, work) / error;
printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e%s\n",
(int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "" : " failed") );
status |= ! (error < tol);
}
else {
printf("%5d --- ( --- ) %7.2f (%7.2f) --- \n",
(int) N, gpu_perf, gpu_time );
}
TESTING_FREE( h_A );
TESTING_HOSTFREE( h_R );
TESTING_DEVFREE( d_A );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}
TESTING_FINALIZE();
return status;
}
示例13: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing dgetrf
*/
int main( int argc, char** argv )
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
double *h_A, *h_R, *work;
magmaDouble_ptr d_A, dwork;
double c_neg_one = MAGMA_D_NEG_ONE;
magma_int_t N, n2, lda, ldda, info, lwork, ldwork;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
double tmp;
double error, rwork[1];
magma_int_t *ipiv;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
// need looser bound (3000*eps instead of 30*eps) for tests
// TODO: should compute ||I - A*A^{-1}|| / (n*||A||*||A^{-1}||)
opts.tolerance = max( 3000., opts.tolerance );
double tol = opts.tolerance * lapackf77_dlamch("E");
printf(" N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F / (N*||A||_F)\n");
printf("=================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
n2 = lda*N;
ldda = ((N+31)/32)*32;
ldwork = N * magma_get_dgetri_nb( N );
gflops = FLOPS_DGETRI( N ) / 1e9;
// query for workspace size
lwork = -1;
lapackf77_dgetri( &N, NULL, &lda, NULL, &tmp, &lwork, &info );
if (info != 0)
printf("lapackf77_dgetri returned error %d: %s.\n",
(int) info, magma_strerror( info ));
lwork = int( MAGMA_D_REAL( tmp ));
TESTING_MALLOC_CPU( ipiv, magma_int_t, N );
TESTING_MALLOC_CPU( work, double, lwork );
TESTING_MALLOC_CPU( h_A, double, n2 );
TESTING_MALLOC_PIN( h_R, double, n2 );
TESTING_MALLOC_DEV( d_A, double, ldda*N );
TESTING_MALLOC_DEV( dwork, double, ldwork );
/* Initialize the matrix */
lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
error = lapackf77_dlange( "f", &N, &N, h_A, &lda, rwork ); // norm(A)
/* Factor the matrix. Both MAGMA and LAPACK will use this factor. */
magma_dsetmatrix( N, N, h_A, lda, d_A, 0, ldda, opts.queue );
magma_dgetrf_gpu( N, N, d_A, 0, ldda, ipiv, opts.queue, &info );
magma_dgetmatrix( N, N, d_A, 0, ldda, h_A, lda, opts.queue );
if ( info != 0 )
printf("magma_dgetrf_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
// check for exact singularity
//h_A[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 );
//magma_dsetmatrix( N, N, h_A, lda, d_A, 0, ldda, opts.queue );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
magma_dgetri_gpu( N, d_A, 0, ldda, ipiv, dwork, 0, ldwork, opts.queues2, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_dgetri_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
magma_dgetmatrix( N, N, d_A, 0, ldda, h_R, lda, opts.queue );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_dgetri( &N, h_A, &lda, ipiv, work, &lwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_dgetri returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
//.........这里部分代码省略.........
示例14: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing zungqr_gpu
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
double Anorm, error, work[1];
magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
magmaDoubleComplex *hA, *hR, *tau, *h_work;
magmaDoubleComplex_ptr dA, dT;
magma_int_t m, n, k;
magma_int_t n2, lda, ldda, lwork, min_mn, nb, info;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
double tol = opts.tolerance * lapackf77_dlamch("E");
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
printf(" m n k CPU GFlop/s (sec) GPU GFlop/s (sec) ||R|| / ||A||\n");
printf("=========================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
m = opts.msize[itest];
n = opts.nsize[itest];
k = opts.ksize[itest];
if ( m < n || n < k ) {
printf( "%5d %5d %5d skipping because m < n or n < k\n", (int) m, (int) n, (int) k );
continue;
}
lda = m;
ldda = ((m + 31)/32)*32;
n2 = lda*n;
min_mn = min(m, n);
nb = magma_get_zgeqrf_nb( m );
lwork = (m + 2*n+nb)*nb;
gflops = FLOPS_ZUNGQR( m, n, k ) / 1e9;
TESTING_MALLOC_PIN( hA, magmaDoubleComplex, lda*n );
TESTING_MALLOC_PIN( h_work, magmaDoubleComplex, lwork );
TESTING_MALLOC_CPU( hR, magmaDoubleComplex, lda*n );
TESTING_MALLOC_CPU( tau, magmaDoubleComplex, min_mn );
TESTING_MALLOC_DEV( dA, magmaDoubleComplex, ldda*n );
TESTING_MALLOC_DEV( dT, magmaDoubleComplex, ( 2*min_mn + ((n + 31)/32)*32 )*nb );
lapackf77_zlarnv( &ione, ISEED, &n2, hA );
lapackf77_zlacpy( MagmaFullStr, &m, &n, hA, &lda, hR, &lda );
Anorm = lapackf77_zlange("f", &m, &n, hA, &lda, work );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
// first, get QR factors in both hA and dA
// okay that magma_zgeqrf_gpu has special structure for R; R isn't used here.
magma_zsetmatrix( m, n, hA, lda, dA, ldda );
magma_zgeqrf_gpu( m, n, dA, ldda, tau, dT, &info );
if (info != 0)
printf("magma_zgeqrf_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
magma_zgetmatrix( m, n, dA, ldda, hA, lda );
gpu_time = magma_wtime();
magma_zungqr_gpu( m, n, k, dA, ldda, tau, dT, nb, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_zungqr_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
// Get dA back to the CPU to compare with the CPU result.
magma_zgetmatrix( m, n, dA, ldda, hR, lda );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
lapackf77_zungqr( &m, &n, &k, hA, &lda, tau, h_work, &lwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_zungqr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
// compute relative error |R|/|A| := |Q_magma - Q_lapack|/|A|
blasf77_zaxpy( &n2, &c_neg_one, hA, &ione, hR, &ione );
error = lapackf77_zlange("f", &m, &n, hR, &lda, work) / Anorm;
bool okay = (error < tol);
status += ! okay;
printf("%5d %5d %5d %7.1f (%7.2f) %7.1f (%7.2f) %8.2e %s\n",
//.........这里部分代码省略.........
示例15: main
int main(int argc, char **argv)
{
TESTING_INIT();
real_Double_t gflops, magma_perf, magma_time, cublas_perf, cublas_time, cpu_perf, cpu_time;
double magma_error, cublas_error, work[1];
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t M, N, Xm, Ym, lda, sizeA, sizeX, sizeY;
magma_int_t incx = 1;
magma_int_t incy = 1;
double c_neg_one = MAGMA_D_NEG_ONE;
double alpha = MAGMA_D_MAKE( 1.5, -2.3 );
double beta = MAGMA_D_MAKE( -0.6, 0.8 );
double *A, *X, *Y, *Ycublas, *Ymagma;
double *dA, *dX, *dY;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
double tol = opts.tolerance * lapackf77_dlamch("E");
printf("trans = %s\n", lapack_trans_const(opts.transA) );
printf(" M N MAGMA Gflop/s (ms) CUBLAS Gflop/s (ms) CPU Gflop/s (ms) MAGMA error CUBLAS error\n");
printf("===================================================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
lda = ((M+31)/32)*32;
gflops = FLOPS_DGEMV( M, N ) / 1e9;
if ( opts.transA == MagmaNoTrans ) {
Xm = N;
Ym = M;
} else {
Xm = M;
Ym = N;
}
sizeA = lda*N;
sizeX = incx*Xm;
sizeY = incy*Ym;
TESTING_MALLOC_CPU( A, double, sizeA );
TESTING_MALLOC_CPU( X, double, sizeX );
TESTING_MALLOC_CPU( Y, double, sizeY );
TESTING_MALLOC_CPU( Ycublas, double, sizeY );
TESTING_MALLOC_CPU( Ymagma, double, sizeY );
TESTING_MALLOC_DEV( dA, double, sizeA );
TESTING_MALLOC_DEV( dX, double, sizeX );
TESTING_MALLOC_DEV( dY, double, sizeY );
/* Initialize the matrix */
lapackf77_dlarnv( &ione, ISEED, &sizeA, A );
lapackf77_dlarnv( &ione, ISEED, &sizeX, X );
lapackf77_dlarnv( &ione, ISEED, &sizeY, Y );
/* =====================================================================
Performs operation using CUBLAS
=================================================================== */
magma_dsetmatrix( M, N, A, lda, dA, lda );
magma_dsetvector( Xm, X, incx, dX, incx );
magma_dsetvector( Ym, Y, incy, dY, incy );
cublas_time = magma_sync_wtime( 0 );
cublasDgemv( handle, cublas_trans_const(opts.transA),
M, N, &alpha, dA, lda, dX, incx, &beta, dY, incy );
cublas_time = magma_sync_wtime( 0 ) - cublas_time;
cublas_perf = gflops / cublas_time;
magma_dgetvector( Ym, dY, incy, Ycublas, incy );
/* =====================================================================
Performs operation using MAGMABLAS
=================================================================== */
magma_dsetvector( Ym, Y, incy, dY, incy );
magma_time = magma_sync_wtime( 0 );
magmablas_dgemv( opts.transA, M, N, alpha, dA, lda, dX, incx, beta, dY, incy );
magma_time = magma_sync_wtime( 0 ) - magma_time;
magma_perf = gflops / magma_time;
magma_dgetvector( Ym, dY, incx, Ymagma, incx );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
cpu_time = magma_wtime();
blasf77_dgemv( lapack_trans_const(opts.transA), &M, &N,
&alpha, A, &lda,
X, &incx,
&beta, Y, &incy );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
/* =====================================================================
Check the result
//.........这里部分代码省略.........