本文整理汇总了C++中MAGMA_C_MAKE函数的典型用法代码示例。如果您正苦于以下问题:C++ MAGMA_C_MAKE函数的具体用法?C++ MAGMA_C_MAKE怎么用?C++ MAGMA_C_MAKE使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MAGMA_C_MAKE函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: magma_ccg
extern "C" magma_int_t
magma_ccg(
magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x,
magma_c_solver_par *solver_par,
magma_queue_t queue )
{
magma_int_t info = 0;
// set queue for old dense routines
magma_queue_t orig_queue=NULL;
magmablasGetKernelStream( &orig_queue );
// prepare solver feedback
solver_par->solver = Magma_CG;
solver_par->numiter = 0;
solver_par->info = MAGMA_SUCCESS;
// local variables
magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE;
magma_int_t dofs = A.num_rows * b.num_cols;
// GPU workspace
magma_c_matrix r={Magma_CSR}, p={Magma_CSR}, q={Magma_CSR};
CHECK( magma_cvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_cvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_cvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
// solver variables
magmaFloatComplex alpha, beta;
float nom, nom0, r0, betanom, betanomsq, den;
// solver setup
CHECK( magma_cresidualvec( A, b, *x, &r, &nom0, queue));
magma_ccopy( dofs, r.dval, 1, p.dval, 1 ); // p = r
betanom = nom0;
nom = nom0 * nom0; // nom = r' * r
CHECK( magma_c_spmv( c_one, A, p, c_zero, q, queue )); // q = A p
den = MAGMA_C_REAL( magma_cdotc(dofs, p.dval, 1, q.dval, 1) );// den = p dot q
solver_par->init_res = nom0;
if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE )
r0 = ATOLERANCE;
if ( nom < r0 ) {
solver_par->final_res = solver_par->init_res;
solver_par->iter_res = solver_par->init_res;
goto cleanup;
}
// check positive definite
if (den <= 0.0) {
printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
magmablasSetKernelStream( orig_queue );
info = MAGMA_NONSPD;
goto cleanup;
}
//Chronometry
real_Double_t tempo1, tempo2;
tempo1 = magma_sync_wtime( queue );
if ( solver_par->verbose > 0 ) {
solver_par->res_vec[0] = (real_Double_t)nom0;
solver_par->timing[0] = 0.0;
}
solver_par->numiter = 0;
// start iteration
do
{
solver_par->numiter++;
alpha = MAGMA_C_MAKE(nom/den, 0.);
magma_caxpy(dofs, alpha, p.dval, 1, x->dval, 1); // x = x + alpha p
magma_caxpy(dofs, -alpha, q.dval, 1, r.dval, 1); // r = r - alpha q
betanom = magma_scnrm2(dofs, r.dval, 1); // betanom = || r ||
betanomsq = betanom * betanom; // betanoms = r' * r
if ( solver_par->verbose > 0 ) {
tempo2 = magma_sync_wtime( queue );
if ( (solver_par->numiter)%solver_par->verbose==0 ) {
solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
= (real_Double_t) betanom;
solver_par->timing[(solver_par->numiter)/solver_par->verbose]
= (real_Double_t) tempo2-tempo1;
}
}
if ( betanom < r0 ) {
break;
}
beta = MAGMA_C_MAKE(betanomsq/nom, 0.); // beta = betanoms/nom
magma_cscal(dofs, beta, p.dval, 1); // p = beta*p
magma_caxpy(dofs, c_one, r.dval, 1, p.dval, 1); // p = p + r
CHECK( magma_c_spmv( c_one, A, p, c_zero, q, queue )); // q = A p
den = MAGMA_C_REAL(magma_cdotc(dofs, p.dval, 1, q.dval, 1));
// den = p dot q
nom = betanomsq;
}
while ( solver_par->numiter+1 <= solver_par->maxiter );
tempo2 = magma_sync_wtime( queue );
//.........这里部分代码省略.........
示例2: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing claset
Code is very similar to testing_clacpy.cpp
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex *h_A, *h_R;
magmaFloatComplex *d_A;
magmaFloatComplex offdiag = MAGMA_C_MAKE( 1.2000, 6.7000 );
magmaFloatComplex diag = MAGMA_C_MAKE( 3.1415, 2.7183 );
magma_int_t M, N, size, lda, ldb, ldda;
magma_int_t ione = 1;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull };
printf("uplo M N CPU GByte/s (ms) GPU GByte/s (ms) check\n");
printf("==================================================================\n");
for( int iuplo = 0; iuplo < 3; ++iuplo ) {
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
//M += 2; // space for insets
//N += 2;
lda = M;
ldb = lda;
ldda = ((M+31)/32)*32;
size = lda*N;
if ( uplo[iuplo] == MagmaLower || uplo[iuplo] == MagmaUpper ) {
// save triangle (with diagonal)
// TODO wrong for trapezoid
gbytes = sizeof(magmaFloatComplex) * 0.5*N*(N+1) / 1e9;
}
else {
// save entire matrix
gbytes = sizeof(magmaFloatComplex) * 1.*M*N / 1e9;
}
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, size );
TESTING_MALLOC_CPU( h_R, magmaFloatComplex, size );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
/* Initialize the matrix */
for( int j = 0; j < N; ++j ) {
for( int i = 0; i < M; ++i ) {
h_A[i + j*lda] = MAGMA_C_MAKE( i + j/10000., j );
}
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_csetmatrix( M, N, h_A, lda, d_A, ldda );
gpu_time = magma_sync_wtime( 0 );
//magmablas_claset( uplo[iuplo], M-2, N-2, offdiag, diag, d_A+1+ldda, ldda ); // inset by 1 row & col
magmablas_claset( uplo[iuplo], M, N, offdiag, diag, d_A, ldda );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gbytes / gpu_time;
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
//magma_int_t M2 = M-2; // inset by 1 row & col
//magma_int_t N2 = N-2;
//lapackf77_claset( lapack_uplo_const( uplo[iuplo] ), &M2, &N2, &offdiag, &diag, h_A+1+lda, &lda );
lapackf77_claset( lapack_uplo_const( uplo[iuplo] ), &M, &N, &offdiag, &diag, h_A, &lda );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gbytes / cpu_time;
/* =====================================================================
Check the result
=================================================================== */
magma_cgetmatrix( M, N, d_A, ldda, h_R, lda );
blasf77_caxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_clange("f", &M, &N, h_R, &lda, work);
printf("%4c %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %s\n",
lapacke_uplo_const( uplo[iuplo] ), (int) M, (int) N,
cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
(error == 0. ? "ok" : "failed") );
status += ! (error == 0.);
TESTING_FREE_CPU( h_A );
TESTING_FREE_CPU( h_R );
TESTING_FREE_DEV( d_A );
fflush( stdout );
//.........这里部分代码省略.........
示例3: magma_cm_5stencil
magma_int_t
magma_cm_5stencil(
magma_int_t n,
magma_c_matrix *A,
magma_queue_t queue )
{
magma_int_t info = 0;
magma_int_t i,j,k;
magma_c_matrix hA={Magma_CSR};
// generate matrix of desired structure and size (2d 5-point stencil)
magma_int_t nn = n*n;
magma_int_t offdiags = 2;
magma_index_t *diag_offset=NULL;
magmaFloatComplex *diag_vals=NULL;
CHECK( magma_cmalloc_cpu( &diag_vals, offdiags+1 ));
CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 ));
diag_offset[0] = 0;
diag_offset[1] = 1;
diag_offset[2] = n;
#define COMPLEX
#ifdef COMPLEX
// complex case
diag_vals[0] = MAGMA_C_MAKE( 4.0, 4.0 );
diag_vals[1] = MAGMA_C_MAKE( -1.0, -1.0 );
diag_vals[2] = MAGMA_C_MAKE( -1.0, -1.0 );
#else
// real case
diag_vals[0] = MAGMA_C_MAKE( 4.0, 0.0 );
diag_vals[1] = MAGMA_C_MAKE( -1.0, 0.0 );
diag_vals[2] = MAGMA_C_MAKE( -1.0, 0.0 );
#endif
CHECK( magma_cmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue ));
// now set some entries to zero (boundary...)
for( i=0; i<n; i++ ) {
for( j=0; j<n; j++ ) {
magma_index_t row = i*n+j;
for( k=hA.row[row]; k<hA.row[row+1]; k++) {
if ((hA.col[k] == row-1 ) && (row+1)%n == 1 )
hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 );
if ((hA.col[k] == row+1 ) && (row)%n ==n-1 )
hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 );
}
}
}
CHECK( magma_cmconvert( hA, A, Magma_CSR, Magma_CSR, queue ));
magma_cmcsrcompressor( A, queue );
A->true_nnz = A->nnz;
cleanup:
magma_free_cpu( diag_vals );
magma_free_cpu( diag_offset );
magma_cmfree( &hA, queue );
return info;
}
示例4: main
/* ////////////////////////////////////////////////////////////////////////////
-- testing sparse matrix vector product
*/
int main( int argc, char** argv )
{
TESTING_INIT();
magma_queue_t queue;
magma_queue_create( /*devices[ opts->device ],*/ &queue );
magma_c_sparse_matrix hA, hA_SELLP, hA_ELL, dA, dA_SELLP, dA_ELL;
hA_SELLP.blocksize = 8;
hA_SELLP.alignment = 8;
real_Double_t start, end, res;
magma_int_t *pntre;
magmaFloatComplex c_one = MAGMA_C_MAKE(1.0, 0.0);
magmaFloatComplex c_zero = MAGMA_C_MAKE(0.0, 0.0);
magma_int_t i, j;
for( i = 1; i < argc; ++i ) {
if ( strcmp("--blocksize", argv[i]) == 0 ) {
hA_SELLP.blocksize = atoi( argv[++i] );
} else if ( strcmp("--alignment", argv[i]) == 0 ) {
hA_SELLP.alignment = atoi( argv[++i] );
} else
break;
}
printf( "\n# usage: ./run_cspmv"
" [ --blocksize %d --alignment %d (for SELLP) ]"
" matrices \n\n", (int) hA_SELLP.blocksize, (int) hA_SELLP.alignment );
while( i < argc ) {
if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test
i++;
magma_int_t laplace_size = atoi( argv[i] );
magma_cm_5stencil( laplace_size, &hA, queue );
} else { // file-matrix test
magma_c_csr_mtx( &hA, argv[i], queue );
}
printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
(int) hA.num_rows,(int) hA.num_cols,(int) hA.nnz );
real_Double_t FLOPS = 2.0*hA.nnz/1e9;
magma_c_vector hx, hy, dx, dy, hrefvec, hcheck;
// init CPU vectors
magma_c_vinit( &hx, Magma_CPU, hA.num_rows, c_zero, queue );
magma_c_vinit( &hy, Magma_CPU, hA.num_rows, c_zero, queue );
// init DEV vectors
magma_c_vinit( &dx, Magma_DEV, hA.num_rows, c_one, queue );
magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
#ifdef MAGMA_WITH_MKL
// calling MKL with CSR
pntre = (magma_int_t*)malloc( (hA.num_rows+1)*sizeof(magma_int_t) );
pntre[0] = 0;
for (j=0; j<hA.num_rows; j++ ) {
pntre[j] = hA.row[j+1];
}
MKL_INT num_rows = hA.num_rows;
MKL_INT num_cols = hA.num_cols;
MKL_INT nnz = hA.nnz;
MKL_INT *col;
TESTING_MALLOC_CPU( col, MKL_INT, nnz );
for( magma_int_t t=0; t < hA.nnz; ++t ) {
col[ t ] = hA.col[ t ];
}
MKL_INT *row;
TESTING_MALLOC_CPU( row, MKL_INT, num_rows );
for( magma_int_t t=0; t < hA.num_rows; ++t ) {
row[ t ] = hA.col[ t ];
}
start = magma_wtime();
for (j=0; j<10; j++ ) {
mkl_ccsrmv( "N", &num_rows, &num_cols,
MKL_ADDR(&c_one), "GFNC", MKL_ADDR(hA.val),
col, row, pntre,
MKL_ADDR(hx.val),
MKL_ADDR(&c_zero), MKL_ADDR(hy.val) );
}
end = magma_wtime();
printf( "\n > MKL : %.2e seconds %.2e GFLOP/s (CSR).\n",
(end-start)/10, FLOPS*10/(end-start) );
TESTING_FREE_CPU( row );
TESTING_FREE_CPU( col );
free(pntre);
#endif // MAGMA_WITH_MKL
// copy matrix to GPU
magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );
// SpMV on GPU (CSR) -- this is the reference!
start = magma_sync_wtime( queue );
for (j=0; j<10; j++)
//.........这里部分代码省略.........
示例5: main
int main(int argc, char **argv)
{
TESTING_INIT();
const magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
const magma_int_t ione = 1;
real_Double_t gflops, magma_perf, magma_time, cpu_perf, cpu_time;
float magma_error, work[1];
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t N, lda, ldda, sizeA, sizeX, sizeY, blocks, ldwork;
magma_int_t incx = 1;
magma_int_t incy = 1;
magma_int_t nb = 64;
magmaFloatComplex alpha = MAGMA_C_MAKE( 1.5, -2.3 );
magmaFloatComplex beta = MAGMA_C_MAKE( -0.6, 0.8 );
magmaFloatComplex *A, *X, *Y, *Ymagma;
magmaFloatComplex_ptr dA, dX, dY, dwork;
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
printf("uplo = %s\n", lapack_uplo_const(opts.uplo) );
printf(" N MAGMA Gflop/s (ms) CPU Gflop/s (ms) MAGMA error\n");
printf("=========================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.nsize[itest];
lda = N;
ldda = ((N + 31)/32)*32;
sizeA = N*lda;
sizeX = N*incx;
sizeY = N*incy;
gflops = FLOPS_CSYMV( N ) / 1e9;
TESTING_MALLOC_CPU( A, magmaFloatComplex, sizeA );
TESTING_MALLOC_CPU( X, magmaFloatComplex, sizeX );
TESTING_MALLOC_CPU( Y, magmaFloatComplex, sizeY );
TESTING_MALLOC_CPU( Ymagma, magmaFloatComplex, sizeY );
TESTING_MALLOC_DEV( dA, magmaFloatComplex, ldda*N );
TESTING_MALLOC_DEV( dX, magmaFloatComplex, sizeX );
TESTING_MALLOC_DEV( dY, magmaFloatComplex, sizeY );
blocks = (N + nb - 1) / nb;
ldwork = ldda*blocks;
TESTING_MALLOC_DEV( dwork, magmaFloatComplex, ldwork );
magmablas_claset( MagmaFull, ldwork, 1, MAGMA_C_NAN, MAGMA_C_NAN, dwork, ldwork );
magmablas_claset( MagmaFull, ldda, N, MAGMA_C_NAN, MAGMA_C_NAN, dA, ldda );
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &sizeA, A );
magma_cmake_hermitian( N, A, lda );
// should not use data from the opposite triangle -- fill with NAN to check
magma_int_t N1 = N-1;
if ( opts.uplo == MagmaUpper ) {
lapackf77_claset( "Lower", &N1, &N1, &MAGMA_C_NAN, &MAGMA_C_NAN, &A[1], &lda );
}
else {
lapackf77_claset( "Upper", &N1, &N1, &MAGMA_C_NAN, &MAGMA_C_NAN, &A[lda], &lda );
}
lapackf77_clarnv( &ione, ISEED, &sizeX, X );
lapackf77_clarnv( &ione, ISEED, &sizeY, Y );
/* Note: CUBLAS does not implement csymv */
/* =====================================================================
Performs operation using MAGMABLAS
=================================================================== */
magma_csetmatrix( N, N, A, lda, dA, ldda );
magma_csetvector( N, X, incx, dX, incx );
magma_csetvector( N, Y, incy, dY, incy );
magma_time = magma_sync_wtime( 0 );
if ( opts.version == 1 ) {
magmablas_csymv_work( opts.uplo, N, alpha, dA, ldda, dX, incx, beta, dY, incy, dwork, ldwork, opts.queue );
}
else {
// non-work interface (has added overhead)
magmablas_csymv( opts.uplo, N, alpha, dA, ldda, dX, incx, beta, dY, incy );
}
magma_time = magma_sync_wtime( 0 ) - magma_time;
magma_perf = gflops / magma_time;
magma_cgetvector( N, dY, incy, Ymagma, incy );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
cpu_time = magma_wtime();
lapackf77_csymv( lapack_uplo_const(opts.uplo), &N, &alpha, A, &lda, X, &incx, &beta, Y, &incy );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
//.........这里部分代码省略.........
示例6: main
int main( int argc, char** argv)
{
#define hA(i,j) (hA + (i) + (j)*lda)
TESTING_CUDA_INIT();
cuFloatComplex c_zero = MAGMA_C_ZERO;
cuFloatComplex c_one = MAGMA_C_ONE;
cuFloatComplex *hA, *hR, *dA;
//real_Double_t gpu_time, gpu_perf;
//int ione = 1;
//int ISEED[4] = {0, 0, 0, 1};
int nsize[] = { 32, 64, 96, 256, 100, 200, 512 };
int ntest = sizeof(nsize) / sizeof(int);
int n = nsize[ntest-1];
int lda = ((n + 31)/32)*32;
int ntile, nb;
TESTING_MALLOC ( hA, cuFloatComplex, lda*n );
TESTING_MALLOC ( hR, cuFloatComplex, lda*n );
TESTING_DEVALLOC ( dA, cuFloatComplex, lda*n );
for( int t = 0; t < ntest; ++t ) {
n = nsize[t];
lda = ((n + 31)/32)*32;
// initialize matrices; entries are (i.j) for A
float nf = 100.;
for( int j = 0; j < n; ++j ) {
// upper
for( int i = 0; i < j; ++i ) {
*hA(i,j) = MAGMA_C_MAKE( (i + j/nf)/nf, 0. );
}
// lower
for( int i = j; i < n; ++i ) {
*hA(i,j) = MAGMA_C_MAKE( i + j/nf, 0. );
}
}
printf( "A%d = ", n );
magma_cprint( n, n, hA, lda );
magma_csetmatrix( n, n, hA, lda, dA, lda );
magmablas_csymmetrize( MagmaLower, n, dA, lda );
magma_cgetmatrix( n, n, dA, lda, hR, lda );
printf( "L%d = ", n );
magma_cprint( n, n, hR, lda );
magma_csetmatrix( n, n, hA, lda, dA, lda );
magmablas_csymmetrize( MagmaUpper, n, dA, lda );
magma_cgetmatrix( n, n, dA, lda, hR, lda );
printf( "U%d = ", n );
magma_cprint( n, n, hR, lda );
// -----
//lapackf77_claset( "u", &n, &n, &c_zero, &c_one, hA, &lda );
nb = 64;
ntile = n / nb;
magma_csetmatrix( n, n, hA, lda, dA, lda );
magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
magma_cgetmatrix( n, n, dA, lda, hR, lda );
printf( "L%d_%d = ", n, nb );
magma_cprint( n, n, hR, lda );
nb = 32;
ntile = n / nb;
magma_csetmatrix( n, n, hA, lda, dA, lda );
magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
magma_cgetmatrix( n, n, dA, lda, hR, lda );
printf( "L%d_%d = ", n, nb );
magma_cprint( n, n, hR, lda );
ntile = (n - nb < 0 ? 0 : (n - nb) / (2*nb) + 1);
magma_csetmatrix( n, n, hA, lda, dA, lda );
magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, 2*nb, nb );
magma_cgetmatrix( n, n, dA, lda, hR, lda );
printf( "L%d_%d_2m = ", n, nb );
magma_cprint( n, n, hR, lda );
nb = 25;
ntile = n / nb;
magma_csetmatrix( n, n, hA, lda, dA, lda );
magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
magma_cgetmatrix( n, n, dA, lda, hR, lda );
printf( "L%d_%d = ", n, nb );
magma_cprint( n, n, hR, lda );
nb = 25;
ntile = (n - nb < 0 ? 0 : (n - nb) / (3*nb) + 1);
magma_csetmatrix( n, n, hA, lda, dA, lda );
magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, 3*nb );
magma_cgetmatrix( n, n, dA, lda, hR, lda );
printf( "L%d_%d_3n = ", n, nb );
magma_cprint( n, n, hR, lda );
nb = 100;
ntile = n / nb;
//.........这里部分代码省略.........
示例7: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgeadd_batched
Code is very similar to testing_clacpy_batched.cpp
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex *h_A, *h_B;
magmaFloatComplex *d_A, *d_B;
magmaFloatComplex **hAarray, **hBarray, **dAarray, **dBarray;
magmaFloatComplex alpha = MAGMA_C_MAKE( 3.1415, 2.718 );
magma_int_t M, N, mb, nb, size, lda, ldda, mstride, nstride, ntile;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
float tol = opts.tolerance * lapackf77_slamch("E");
mb = (opts.nb == 0 ? 32 : opts.nb);
nb = (opts.nb == 0 ? 64 : opts.nb);
mstride = 2*mb;
nstride = 3*nb;
printf("mb=%d, nb=%d, mstride=%d, nstride=%d\n", (int) mb, (int) nb, (int) mstride, (int) nstride );
printf(" M N ntile CPU GFlop/s (ms) GPU GFlop/s (ms) error \n");
printf("====================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
lda = M;
ldda = ((M+31)/32)*32;
size = lda*N;
if ( N < nb || M < nb ) {
ntile = 0;
} else {
ntile = min( (M - nb)/mstride + 1,
(N - nb)/nstride + 1 );
}
gflops = 2.*mb*nb*ntile / 1e9;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda *N );
TESTING_MALLOC_CPU( h_B, magmaFloatComplex, lda *N );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
TESTING_MALLOC_DEV( d_B, magmaFloatComplex, ldda*N );
TESTING_MALLOC_CPU( hAarray, magmaFloatComplex*, ntile );
TESTING_MALLOC_CPU( hBarray, magmaFloatComplex*, ntile );
TESTING_MALLOC_DEV( dAarray, magmaFloatComplex*, ntile );
TESTING_MALLOC_DEV( dBarray, magmaFloatComplex*, ntile );
lapackf77_clarnv( &ione, ISEED, &size, h_A );
lapackf77_clarnv( &ione, ISEED, &size, h_B );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_csetmatrix( M, N, h_A, lda, d_A, ldda );
magma_csetmatrix( M, N, h_B, lda, d_B, ldda );
// setup pointers
for( int tile = 0; tile < ntile; ++tile ) {
int offset = tile*mstride + tile*nstride*ldda;
hAarray[tile] = &d_A[offset];
hBarray[tile] = &d_B[offset];
}
magma_setvector( ntile, sizeof(magmaFloatComplex*), hAarray, 1, dAarray, 1 );
magma_setvector( ntile, sizeof(magmaFloatComplex*), hBarray, 1, dBarray, 1 );
gpu_time = magma_sync_wtime( 0 );
magmablas_cgeadd_batched( mb, nb, alpha, dAarray, ldda, dBarray, ldda, ntile );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gflops / gpu_time;
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
for( int tile = 0; tile < ntile; ++tile ) {
int offset = tile*mstride + tile*nstride*lda;
for( int j = 0; j < nb; ++j ) {
blasf77_caxpy( &mb, &alpha,
&h_A[offset + j*lda], &ione,
&h_B[offset + j*lda], &ione );
}
}
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
/* =====================================================================
Check the result
=================================================================== */
magma_cgetmatrix( M, N, d_B, ldda, h_A, lda );
//.........这里部分代码省略.........
示例8: magma_cheevx
//.........这里部分代码省略.........
lopt = n + (magma_int_t)MAGMA_C_REAL(work[indwrk]);
/* If all eigenvalues are desired and ABSTOL is less than or equal to
zero, then call SSTERF or CUNGTR and CSTEQR. If this fails for
some eigenvalue, then try SSTEBZ. */
if ((alleig || (indeig && il == 1 && iu == n)) && abstol <= 0.) {
blasf77_scopy(&n, &rwork[indd], &ione, &w[1], &ione);
indee = indrwk + 2*n;
if (! wantz) {
i__1 = n - 1;
blasf77_scopy(&i__1, &rwork[inde], &ione, &rwork[indee], &ione);
lapackf77_ssterf(&n, &w[1], &rwork[indee], info);
}
else {
lapackf77_clacpy("A", &n, &n, a, &lda, z, &ldz);
lapackf77_cungtr(uplo_, &n, z, &ldz, &work[indtau], &work[indwrk], &llwork, &iinfo);
i__1 = n - 1;
blasf77_scopy(&i__1, &rwork[inde], &ione, &rwork[indee], &ione);
lapackf77_csteqr(jobz_, &n, &w[1], &rwork[indee], z, &ldz, &rwork[indrwk], info);
if (*info == 0) {
for (i = 1; i <= n; ++i) {
ifail[i] = 0;
}
}
}
if (*info == 0) {
*m = n;
}
}
/* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */
if (*m == 0) {
*info = 0;
if (wantz) {
*(unsigned char *)order = 'B';
} else {
*(unsigned char *)order = 'E';
}
indibl = 1;
indisp = indibl + n;
indiwk = indisp + n;
lapackf77_sstebz(range_, order, &n, &vl, &vu, &il, &iu, &abstol, &rwork[indd], &rwork[inde], m,
&nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[indrwk], &iwork[indiwk], info);
if (wantz) {
lapackf77_cstein(&n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], &iwork[indisp],
z, &ldz, &rwork[indrwk], &iwork[indiwk], &ifail[1], info);
/* Apply unitary matrix used in reduction to tridiagonal
form to eigenvectors returned by CSTEIN. */
magma_cunmtr(MagmaLeft, uplo, MagmaNoTrans, n, *m, a, lda, &work[indtau],
z, ldz, &work[indwrk], llwork, &iinfo);
}
}
/* If matrix was scaled, then rescale eigenvalues appropriately. */
if (iscale == 1) {
if (*info == 0) {
imax = *m;
} else {
imax = *info - 1;
}
d__1 = 1. / sigma;
blasf77_sscal(&imax, &d__1, &w[1], &ione);
}
/* If eigenvalues are not in order, then sort them, along with
eigenvectors. */
if (wantz) {
for (j = 1; j <= *m-1; ++j) {
i = 0;
tmp1 = w[j];
for (jj = j + 1; jj <= *m; ++jj) {
if (w[jj] < tmp1) {
i = jj;
tmp1 = w[jj];
}
}
if (i != 0) {
itmp1 = iwork[indibl + i - 1];
w[i] = w[j];
iwork[indibl + i - 1] = iwork[indibl + j - 1];
w[j] = tmp1;
iwork[indibl + j - 1] = itmp1;
blasf77_cswap(&n, z + (i-1)*ldz, &ione, z + (j-1)*ldz, &ione);
if (*info != 0) {
itmp1 = ifail[i];
ifail[i] = ifail[j];
ifail[j] = itmp1;
}
}
}
}
/* Set WORK(1) to optimal complex workspace size. */
work[1] = MAGMA_C_MAKE((float) lopt, 0.);
return *info;
} /* magma_cheevx */
示例9: magma_clatrsd
//.........这里部分代码省略.........
}
if ( upper ) {
if ( j > 0 ) {
/* Compute the update */
/* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */
len = j;
ztmp = -tscal * x[j];
blasf77_caxpy( &len, &ztmp, A(0,j), &ione, &x[0], &ione );
i = blasf77_icamax( &len, &x[0], &ione ) - 1;
xmax = MAGMA_C_ABS1( x[i] );
}
}
else {
if ( j < n-1 ) {
/* Compute the update */
/* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */
len = n - (j+1);
ztmp = -tscal * x[j];
blasf77_caxpy( &len, &ztmp, A(j+1,j), &ione, &x[j + 1], &ione );
i = j + blasf77_icamax( &len, &x[j + 1], &ione );
xmax = MAGMA_C_ABS1( x[i] );
}
}
}
}
else if ( trans == MagmaTrans ) {
/* ---------------------------------------- */
/* Solve A**T * x = b */
for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) {
/* Compute x(j) = b(j) - sum A(k,j)*x(k). */
/* k<>j */
xj = MAGMA_C_ABS1( x[j] );
uscal = MAGMA_C_MAKE( tscal, 0. );
rec = 1. / max( xmax, 1. );
if ( cnorm[j] > (bignum - xj) * rec ) {
/* If x(j) could overflow, scale x by 1/(2*XMAX). */
rec *= 0.5;
if ( nounit ) {
tjjs = (*A(j,j) - lambda) * tscal;
}
else {
tjjs = (c_one - lambda) * tscal;
}
tjj = MAGMA_C_ABS1( tjjs );
if ( tjj > 1. ) {
/* Divide by A(j,j) when scaling x if A(j,j) > 1. */
rec = min( 1., rec * tjj );
uscal = uscal / tjjs;
}
if ( rec < 1. ) {
blasf77_csscal( &n, &rec, &x[0], &ione );
*scale *= rec;
xmax *= rec;
}
}
csumj = c_zero;
if ( uscal == c_one ) {
/* If the scaling needed for A in the dot product is 1, */
/* call CDOTU to perform the dot product. */
if ( upper ) {
csumj = magma_cblas_cdotu( j, A(0,j), ione, &x[0], ione );
}
else if ( j < n-1 ) {
csumj = magma_cblas_cdotu( n-(j+1), A(j+1,j), ione, &x[j+1], ione );
示例10: magma_cbicgstab
extern "C" magma_int_t
magma_cbicgstab(
magma_c_sparse_matrix A, magma_c_vector b, magma_c_vector *x,
magma_c_solver_par *solver_par,
magma_queue_t queue )
{
// set queue for old dense routines
magma_queue_t orig_queue;
magmablasGetKernelStream( &orig_queue );
// prepare solver feedback
solver_par->solver = Magma_BICGSTAB;
solver_par->numiter = 0;
solver_par->info = MAGMA_SUCCESS;
// some useful variables
magmaFloatComplex c_zero = MAGMA_C_ZERO, c_one = MAGMA_C_ONE,
c_mone = MAGMA_C_NEG_ONE;
magma_int_t dofs = A.num_rows;
// workspace
magma_c_vector r,rr,p,v,s,t;
magma_c_vinit( &r, Magma_DEV, dofs, c_zero, queue );
magma_c_vinit( &rr, Magma_DEV, dofs, c_zero, queue );
magma_c_vinit( &p, Magma_DEV, dofs, c_zero, queue );
magma_c_vinit( &v, Magma_DEV, dofs, c_zero, queue );
magma_c_vinit( &s, Magma_DEV, dofs, c_zero, queue );
magma_c_vinit( &t, Magma_DEV, dofs, c_zero, queue );
// solver variables
magmaFloatComplex alpha, beta, omega, rho_old, rho_new;
float nom, betanom, nom0, r0, den, res;
// solver setup
magma_cscal( dofs, c_zero, x->dval, 1) ; // x = 0
magma_ccopy( dofs, b.dval, 1, r.dval, 1 ); // r = b
magma_ccopy( dofs, b.dval, 1, rr.dval, 1 ); // rr = b
nom0 = betanom = magma_scnrm2( dofs, r.dval, 1 ); // nom = || r ||
nom = nom0*nom0;
rho_old = omega = alpha = MAGMA_C_MAKE( 1.0, 0. );
solver_par->init_res = nom0;
magma_c_spmv( c_one, A, r, c_zero, v, queue ); // z = A r
den = MAGMA_C_REAL( magma_cdotc(dofs, v.dval, 1, r.dval, 1) ); // den = z' * r
if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE )
r0 = ATOLERANCE;
if ( nom < r0 ) {
magmablasSetKernelStream( orig_queue );
return MAGMA_SUCCESS;
}
// check positive definite
if (den <= 0.0) {
printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
magmablasSetKernelStream( orig_queue );
return MAGMA_NONSPD;
solver_par->info = MAGMA_NONSPD;;
}
//Chronometry
real_Double_t tempo1, tempo2;
tempo1 = magma_sync_wtime( queue );
if ( solver_par->verbose > 0 ) {
solver_par->res_vec[0] = nom0;
solver_par->timing[0] = 0.0;
}
// start iteration
for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter;
solver_par->numiter++ ) {
rho_new = magma_cdotc( dofs, rr.dval, 1, r.dval, 1 ); // rho=<rr,r>
beta = rho_new/rho_old * alpha/omega; // beta=rho/rho_old *alpha/omega
magma_cscal( dofs, beta, p.dval, 1 ); // p = beta*p
magma_caxpy( dofs, c_mone * omega * beta, v.dval, 1 , p.dval, 1 );
// p = p-omega*beta*v
magma_caxpy( dofs, c_one, r.dval, 1, p.dval, 1 ); // p = p+r
magma_c_spmv( c_one, A, p, c_zero, v, queue ); // v = Ap
alpha = rho_new / magma_cdotc( dofs, rr.dval, 1, v.dval, 1 );
magma_ccopy( dofs, r.dval, 1 , s.dval, 1 ); // s=r
magma_caxpy( dofs, c_mone * alpha, v.dval, 1 , s.dval, 1 ); // s=s-alpha*v
magma_c_spmv( c_one, A, s, c_zero, t, queue ); // t=As
omega = magma_cdotc( dofs, t.dval, 1, s.dval, 1 ) // omega = <s,t>/<t,t>
/ magma_cdotc( dofs, t.dval, 1, t.dval, 1 );
magma_caxpy( dofs, alpha, p.dval, 1 , x->dval, 1 ); // x=x+alpha*p
magma_caxpy( dofs, omega, s.dval, 1 , x->dval, 1 ); // x=x+omega*s
magma_ccopy( dofs, s.dval, 1 , r.dval, 1 ); // r=s
magma_caxpy( dofs, c_mone * omega, t.dval, 1 , r.dval, 1 ); // r=r-omega*t
res = betanom = magma_scnrm2( dofs, r.dval, 1 );
nom = betanom*betanom;
rho_old = rho_new; // rho_old=rho
if ( solver_par->verbose > 0 ) {
//.........这里部分代码省略.........
示例11: magma_chegvd_m
//.........这里部分代码省略.........
*info = 0;
if (itype < 1 || itype > 3) {
*info = -1;
} else if (! (wantz || lapackf77_lsame(jobz_, MagmaNoVecStr))) {
*info = -2;
} else if (! (lower || lapackf77_lsame(uplo_, MagmaUpperStr))) {
*info = -3;
} else if (n < 0) {
*info = -4;
} else if (lda < max(1,n)) {
*info = -6;
} else if (ldb < max(1,n)) {
*info = -8;
}
magma_int_t nb = magma_get_chetrd_nb( n );
if ( n <= 1 ) {
lwmin = 1;
lrwmin = 1;
liwmin = 1;
}
else if ( wantz ) {
lwmin = 2*n + n*n;
lrwmin = 1 + 5*n + 2*n*n;
liwmin = 3 + 5*n;
}
else {
lwmin = n + n*nb;
lrwmin = n;
liwmin = 1;
}
work[0] = MAGMA_C_MAKE( lwmin * (1. + lapackf77_slamch("Epsilon")), 0.); // round up
rwork[0] = lrwmin * (1. + lapackf77_slamch("Epsilon"));
iwork[0] = liwmin;
if (lwork < lwmin && ! lquery) {
*info = -11;
} else if (lrwork < lrwmin && ! lquery) {
*info = -13;
} else if (liwork < liwmin && ! lquery) {
*info = -15;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
else if (lquery) {
return *info;
}
/* Quick return if possible */
if (n == 0) {
return *info;
}
/* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
if (n <= 128){
#ifdef ENABLE_DEBUG
printf("--------------------------------------------------------------\n");
printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
printf("--------------------------------------------------------------\n");
#endif
lapackf77_chegvd(&itype, jobz_, uplo_,
示例12: H
//.........这里部分代码省略.........
else if (! notran && trans != Magma_ConjTrans) {
*info = -3;
}
else if (m < 0) {
*info = -4;
}
else if (n < 0) {
*info = -5;
}
else if (k < 0) {
*info = -6;
}
else if ( ( applyq && lda < max(1,nq) ) ||
( ! applyq && lda < max(1,min(nq,k)) ) ) {
*info = -8;
}
else if (ldc < max(1,m)) {
*info = -11;
}
else if (lwork < max(1,nw) && ! lquery) {
*info = -13;
}
if (*info == 0) {
if (nw > 0) {
// TODO have get_cunmqr_nb and get_cunmlq_nb routines? see original LAPACK cunmbr.
// TODO make them dependent on m, n, and k?
nb = magma_get_cgebrd_nb( min( m, n ));
lwkopt = max(1, nw*nb);
}
else {
lwkopt = 1;
}
work[0] = MAGMA_C_MAKE( lwkopt, 0 );
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
else if (lquery) {
return *info;
}
/* Quick return if possible */
if (m == 0 || n == 0) {
return *info;
}
if (applyq) {
/* Apply Q */
if (nq >= k) {
/* Q was determined by a call to CGEBRD with nq >= k */
#if VERSION == 1
lapackf77_cunmqr( lapack_side_const(side), lapack_trans_const(trans),
&m, &n, &k, A, &lda, tau, C, &ldc, work, &lwork, &iinfo);
#else
magma_cunmqr( side, trans,
m, n, k, A, lda, tau, C, ldc, work, lwork, &iinfo);
#endif
}
else if (nq > 1) {
/* Q was determined by a call to CGEBRD with nq < k */
if (left) {
mi = m - 1;
ni = n;
示例13: interval
//.........这里部分代码省略.........
if (valeig) {
if (n > 0 && vu <= vl) {
*info = -11;
}
} else if (indeig) {
if (il < 1 || il > max(1,n)) {
*info = -12;
} else if (iu < min(n,il) || iu > n) {
*info = -13;
}
}
}
magma_int_t nb = magma_get_chetrd_nb( n );
if ( n <= 1 ) {
lwmin = 1;
lrwmin = 1;
liwmin = 1;
}
else if ( wantz ) {
lwmin = 2*n + n*n;
lrwmin = 1 + 5*n + 2*n*n;
liwmin = 3 + 5*n;
}
else {
lwmin = n + n*nb;
lrwmin = n;
liwmin = 1;
}
// multiply by 1+eps (in Double!) to ensure length gets rounded up,
// if it cannot be exactly represented in floating point.
real_Double_t one_eps = 1. + lapackf77_slamch("Epsilon");
work[0] = MAGMA_C_MAKE( lwmin * one_eps, 0.); // round up
rwork[0] = lrwmin * one_eps;
iwork[0] = liwmin;
if (lwork < lwmin && ! lquery) {
*info = -17;
} else if (lrwork < lrwmin && ! lquery) {
*info = -19;
} else if (liwork < liwmin && ! lquery) {
*info = -21;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info));
return *info;
}
else if (lquery) {
return *info;
}
/* Quick return if possible */
if (n == 0) {
return *info;
}
/* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
if (n <= 128) {
#ifdef ENABLE_DEBUG
printf("--------------------------------------------------------------\n");
printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
printf("--------------------------------------------------------------\n");
#endif
lapackf77_chegvd(&itype, jobz_, uplo_,
示例14: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cher2k
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, cublas_perf, cublas_time, cpu_perf, cpu_time;
float cublas_error, Cnorm, work[1];
magma_int_t N, K;
magma_int_t Ak, An, Bk, Bn;
magma_int_t sizeA, sizeB, sizeC;
magma_int_t lda, ldb, ldc, ldda, lddb, lddc;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magmaFloatComplex *h_A, *h_B, *h_C, *h_Ccublas;
magmaFloatComplex *d_A, *d_B, *d_C;
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex alpha = MAGMA_C_MAKE( 0.29, -0.86 );
float beta = MAGMA_D_MAKE( -0.48, 0.38 );
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
float tol = opts.tolerance * lapackf77_slamch("E");
printf("If running lapack (option --lapack), CUBLAS error is computed\n"
"relative to CPU BLAS result.\n\n");
printf("uplo = %s, transA = %s\n",
lapack_uplo_const(opts.uplo), lapack_trans_const(opts.transA) );
printf(" N K CUBLAS Gflop/s (ms) CPU Gflop/s (ms) CUBLAS error\n");
printf("==================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
N = opts.msize[itest];
K = opts.ksize[itest];
gflops = FLOPS_CHER2K(K, N) / 1e9;
if ( opts.transA == MagmaNoTrans ) {
lda = An = N;
Ak = K;
ldb = Bn = N;
Bk = K;
} else {
lda = An = K;
Ak = N;
ldb = Bn = K;
Bk = N;
}
ldc = N;
ldda = ((lda+31)/32)*32;
lddb = ((ldb+31)/32)*32;
lddc = ((ldc+31)/32)*32;
sizeA = lda*Ak;
sizeB = ldb*Ak;
sizeC = ldc*N;
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda*Ak );
TESTING_MALLOC_CPU( h_B, magmaFloatComplex, ldb*Bk );
TESTING_MALLOC_CPU( h_C, magmaFloatComplex, ldc*N );
TESTING_MALLOC_CPU( h_Ccublas, magmaFloatComplex, ldc*N );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*Ak );
TESTING_MALLOC_DEV( d_B, magmaFloatComplex, lddb*Bk );
TESTING_MALLOC_DEV( d_C, magmaFloatComplex, lddc*N );
/* Initialize the matrices */
lapackf77_clarnv( &ione, ISEED, &sizeA, h_A );
lapackf77_clarnv( &ione, ISEED, &sizeB, h_B );
lapackf77_clarnv( &ione, ISEED, &sizeC, h_C );
/* =====================================================================
Performs operation using CUBLAS
=================================================================== */
magma_csetmatrix( An, Ak, h_A, lda, d_A, ldda );
magma_csetmatrix( Bn, Bk, h_B, ldb, d_B, lddb );
magma_csetmatrix( N, N, h_C, ldc, d_C, lddc );
cublas_time = magma_sync_wtime( NULL );
cublasCher2k( handle, cublas_uplo_const(opts.uplo), cublas_trans_const(opts.transA), N, K,
&alpha, d_A, ldda,
d_B, lddb,
&beta, d_C, lddc );
cublas_time = magma_sync_wtime( NULL ) - cublas_time;
cublas_perf = gflops / cublas_time;
magma_cgetmatrix( N, N, d_C, lddc, h_Ccublas, ldc );
/* =====================================================================
Performs operation using CPU BLAS
=================================================================== */
if ( opts.lapack ) {
cpu_time = magma_wtime();
blasf77_cher2k( lapack_uplo_const(opts.uplo), lapack_trans_const(opts.transA), &N, &K,
//.........这里部分代码省略.........
示例15: work
/**
Purpose
-------
CGEGQR orthogonalizes the N vectors given by a complex M-by-N matrix A:
A = Q * R.
On exit, if successful, the orthogonal vectors Q overwrite A
and R is given in work (on the CPU memory).
The routine is designed for tall-and-skinny matrices: M >> N, N <= 128.
This version uses normal equations and SVD in an iterative process that
makes the computation numerically accurate.
Arguments
---------
@param[in]
ikind INTEGER
Several versions are implemented indiceted by the ikind value:
1: This version uses normal equations and SVD in an iterative process
that makes the computation numerically accurate.
2: This version uses a standard LAPACK-based orthogonalization through
MAGMA's QR panel factorization (magma_cgeqr2x3_gpu) and magma_cungqr
3: MGS
4. Cholesky QR
@param[in]
m INTEGER
The number of rows of the matrix A. m >= n >= 0.
@param[in]
n INTEGER
The number of columns of the matrix A. 128 >= n >= 0.
@param[in,out]
dA COMPLEX array on the GPU, dimension (ldda,n)
On entry, the m-by-n matrix A.
On exit, the m-by-n matrix Q with orthogonal columns.
@param[in]
ldda INTEGER
The leading dimension of the array dA. LDDA >= max(1,m).
To benefit from coalescent memory accesses LDDA must be
divisible by 16.
@param
dwork (GPU workspace) COMPLEX array, dimension:
n^2 for ikind = 1
3 n^2 + min(m, n) for ikind = 2
0 (not used) for ikind = 3
n^2 for ikind = 4
@param[out]
work (CPU workspace) COMPLEX array, dimension 3 n^2.
On exit, work(1:n^2) holds the rectangular matrix R.
Preferably, for higher performance, work should be in pinned memory.
@param[out]
info INTEGER
- = 0: successful exit
- < 0: if INFO = -i, the i-th argument had an illegal value
or another error occured, such as memory allocation failed.
@ingroup magma_cgeqrf_comp
********************************************************************/
extern "C" magma_int_t
magma_cgegqr_gpu( magma_int_t ikind, magma_int_t m, magma_int_t n,
magmaFloatComplex *dA, magma_int_t ldda,
magmaFloatComplex *dwork, magmaFloatComplex *work,
magma_int_t *info )
{
#define work(i_,j_) (work + (i_) + (j_)*n)
#define dA(i_,j_) (dA + (i_) + (j_)*ldda)
magma_int_t i = 0, j, k, n2 = n*n;
magma_int_t ione = 1;
magmaFloatComplex c_zero = MAGMA_C_ZERO;
magmaFloatComplex c_one = MAGMA_C_ONE;
float cn = 200., mins, maxs;
/* check arguments */
*info = 0;
if (ikind < 1 || ikind > 4) {
*info = -1;
} else if (m < 0 || m < n) {
*info = -2;
} else if (n < 0 || n > 128) {
*info = -3;
} else if (ldda < max(1,m)) {
*info = -5;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
if (ikind == 1) {
// === Iterative, based on SVD ============================================================
magmaFloatComplex *U, *VT, *vt, *R, *G, *hwork, *tau;
//.........这里部分代码省略.........