本文整理汇总了C++中MAGMA_D_REAL函数的典型用法代码示例。如果您正苦于以下问题:C++ MAGMA_D_REAL函数的具体用法?C++ MAGMA_D_REAL怎么用?C++ MAGMA_D_REAL使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MAGMA_D_REAL函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: magma_dfrobenius
magma_int_t
magma_dfrobenius(
magma_d_matrix A,
magma_d_matrix B,
real_Double_t *res,
magma_queue_t queue ){
real_Double_t tmp2;
magma_int_t i,j,k;
*res = 0.0;
for(i=0; i<A.num_rows; i++){
for(j=A.row[i]; j<A.row[i+1]; j++){
magma_index_t localcol = A.col[j];
for( k=B.row[i]; k<B.row[i+1]; k++){
if(B.col[k] == localcol){
tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(A.val[j] )
- MAGMA_D_REAL(B.val[k]) );
(*res) = (*res) + tmp2* tmp2;
}
}
}
}
(*res) = sqrt((*res));
return MAGMA_SUCCESS;
}
示例2: magma_dprint_csr
magma_int_t
magma_dprint_csr(
magma_int_t n_row,
magma_int_t n_col,
magma_int_t nnz,
double **val,
magma_index_t **row,
magma_index_t **col,
magma_queue_t queue )
{
printf( "Matrix in CSR format (row col val)\n" );
printf( " %d %d %d\n", int(n_row), int(n_col), int(nnz) );
magma_index_t info = 0, i=0, j=0;
for(i=0; i < n_col; i++) {
magma_index_t rowtemp1 = (*row)[i];
magma_index_t rowtemp2 = (*row)[i+1];
for(j=0; j < rowtemp2 - rowtemp1; j++) {
printf(" %d %d %.2f\n", (rowtemp1+1), (*col)[rowtemp1+j]+1,
MAGMA_D_REAL((*val)[rowtemp1+j]) );
}
}
return info;
}
示例3: magma_d_isinf
/** @return true if either real(x) or imag(x) is INF. */
inline bool magma_d_isinf( double x )
{
#ifdef COMPLEX
return isinf( MAGMA_D_REAL( x )) ||
isinf( MAGMA_D_IMAG( x ));
#else
return isinf( x );
#endif
}
示例4: magma_dmake_hpd
void magma_dmake_hpd( magma_int_t N, double* A, magma_int_t lda )
{
magma_int_t i, j;
for( i=0; i<N; ++i ) {
A(i,i) = MAGMA_D_MAKE( MAGMA_D_REAL( A(i,i) ) + N, 0. );
for( j=0; j<i; ++j ) {
A(j,i) = MAGMA_D_CNJG( A(i,j) );
}
}
}
示例5: init_matrix
void init_matrix( magma_int_t N, double *h_A, magma_int_t lda )
{
magma_int_t ione = 1, n2 = N*lda;
magma_int_t ISEED[4] = {0,0,0,1};
lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
/* Symmetrize and increase the diagonal */
for (magma_int_t i = 0; i < N; ++i) {
h_A(i,i) = MAGMA_D_MAKE( MAGMA_D_REAL(h_A(i,i)) + N, 0 );
for (magma_int_t j = 0; j < i; ++j)
h_A(i, j) = MAGMA_D_CNJG( h_A(j, i) );
}
}
示例6: magma_dmdiff
extern "C" magma_int_t
magma_dmdiff(
magma_d_matrix A, magma_d_matrix B,
real_Double_t *res,
magma_queue_t queue )
{
magma_int_t info = 0;
if( A.memory_location == Magma_CPU && B.memory_location == Magma_CPU
&& A.storage_type == Magma_CSR && B.storage_type == Magma_CSR ){
real_Double_t tmp2;
magma_int_t i,j,k;
*res = 0.0;
for(i=0; i<A.num_rows; i++) {
for(j=A.row[i]; j<A.row[i+1]; j++) {
magma_index_t localcol = A.col[j];
for( k=B.row[i]; k<B.row[i+1]; k++) {
if (B.col[k] == localcol) {
tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(A.val[j] )
- MAGMA_D_REAL(B.val[k]) );
(*res) = (*res) + tmp2* tmp2;
}
}
}
}
(*res) = sqrt((*res));
}
else{
printf("error: mdiff only supported for CSR matrices on the CPU.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
}
return info;
}
示例7: magma_drowentries
extern "C" magma_int_t
magma_drowentries(
magma_d_matrix *A,
magma_queue_t queue )
{
magma_int_t info = 0;
magma_index_t *length=NULL;
magma_index_t i,j, maxrowlength=0;
// check whether matrix on CPU
if ( A->memory_location == Magma_CPU ) {
// CSR
if ( A->storage_type == Magma_CSR ) {
CHECK( magma_index_malloc_cpu( &length, A->num_rows));
for( i=0; i<A->num_rows; i++ ) {
length[i] = A->row[i+1]-A->row[i];
if (length[i] > maxrowlength)
maxrowlength = length[i];
}
A->max_nnz_row = maxrowlength;
}
// Dense
else if ( A->storage_type == Magma_DENSE ) {
CHECK( magma_index_malloc_cpu( &length, A->num_rows));
for( i=0; i<A->num_rows; i++ ) {
length[i] = 0;
for( j=0; j<A->num_cols; j++ ) {
if ( MAGMA_D_REAL( A->val[i*A->num_cols + j] ) != 0. )
length[i]++;
}
if (length[i] > maxrowlength)
maxrowlength = length[i];
}
A->max_nnz_row = maxrowlength;
}
} // end CPU case
else {
printf("error: matrix not on CPU.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
}
cleanup:
magma_free( length );
return info;
}
示例8: magma_dmalloc_cpu
double *generate_sym_matrix(int m) {
double *a;
magma_int_t i,j;
magma_int_t mm = m*m;
magma_err_t err;
err = magma_dmalloc_cpu ( &a , mm );
magma_int_t ione = 1;
magma_int_t ISEED [4] = {0 ,0 ,0 ,1};
lapackf77_dlarnv (&ione ,ISEED ,&mm ,a);
for(i=0; i<m; i++) {
MAGMA_D_SET2REAL (a[i*m+i],( MAGMA_D_REAL (a[i*m+i ])+1.* m ) );
for (j=0; j<i; j++)
a[i*m+j] = (a[j*m+i]);
}
return a;
}
示例9: magma_dprint
void magma_dprint( int m, int n, double *A, int lda )
{
double c_zero = MAGMA_D_ZERO;
printf( "[\n" );
for( int i = 0; i < m; ++i ) {
for( int j = 0; j < n; ++j ) {
if ( MAGMA_D_EQUAL( *A(i,j), c_zero )) {
printf( " 0. " );
}
else {
printf( " %8.4f", MAGMA_D_REAL( *A(i,j) ));
}
}
printf( "\n" );
}
printf( "];\n" );
}
示例10: GeneratePlaneRotation
static void
GeneratePlaneRotation(double dx, double dy, double *cs, double *sn)
{
#if defined(PRECISION_s) | defined(PRECISION_d)
if (dy == MAGMA_D_ZERO) {
*cs = MAGMA_D_ONE;
*sn = MAGMA_D_ZERO;
} else if (MAGMA_D_ABS((dy)) > MAGMA_D_ABS((dx))) {
double temp = dx / dy;
*sn = MAGMA_D_ONE / magma_dsqrt( ( MAGMA_D_ONE + temp*temp));
*cs = temp * (*sn);
} else {
double temp = dy / dx;
*cs = MAGMA_D_ONE / magma_dsqrt( ( MAGMA_D_ONE + temp*temp ));
*sn = temp * (*cs);
}
#else
// below the code Joss Knight from MathWorks provided me with - this works.
// No idea why the above code fails for real - maybe rounding.
real_Double_t rho = sqrt(MAGMA_D_REAL(MAGMA_D_CONJ(dx)*dx + MAGMA_D_CONJ(dy)*dy));
*cs = dx / rho;
*sn = dy / rho;
#endif
}
示例11: magma_dcg_merge
magma_int_t
magma_dcg_merge( magma_d_sparse_matrix A, magma_d_vector b, magma_d_vector *x,
magma_d_solver_par *solver_par ){
// prepare solver feedback
solver_par->solver = Magma_CGMERGE;
solver_par->numiter = 0;
solver_par->info = 0;
// some useful variables
double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE;
magma_int_t dofs = A.num_rows;
// GPU stream
magma_queue_t stream[2];
magma_event_t event[1];
magma_queue_create( &stream[0] );
magma_queue_create( &stream[1] );
magma_event_create( &event[0] );
// GPU workspace
magma_d_vector r, d, z;
magma_d_vinit( &r, Magma_DEV, dofs, c_zero );
magma_d_vinit( &d, Magma_DEV, dofs, c_zero );
magma_d_vinit( &z, Magma_DEV, dofs, c_zero );
double *d1, *d2, *skp;
magma_dmalloc( &d1, dofs*(1) );
magma_dmalloc( &d2, dofs*(1) );
// array for the parameters
magma_dmalloc( &skp, 6 ); // skp = [alpha|beta|gamma|rho|tmp1|tmp2]
// solver variables
double alpha, beta, gamma, rho, tmp1, *skp_h;
double nom, nom0, r0, betanom, den;
// solver setup
magma_dscal( dofs, c_zero, x->val, 1) ; // x = 0
magma_dcopy( dofs, b.val, 1, r.val, 1 ); // r = b
magma_dcopy( dofs, b.val, 1, d.val, 1 ); // d = b
nom0 = betanom = magma_dnrm2( dofs, r.val, 1 );
nom = nom0 * nom0; // nom = r' * r
magma_d_spmv( c_one, A, d, c_zero, z ); // z = A d
den = MAGMA_D_REAL( magma_ddot(dofs, d.val, 1, z.val, 1) ); // den = d'* z
solver_par->init_res = nom0;
// array on host for the parameters
magma_dmalloc_cpu( &skp_h, 6 );
alpha = rho = gamma = tmp1 = c_one;
beta = magma_ddot(dofs, r.val, 1, r.val, 1);
skp_h[0]=alpha;
skp_h[1]=beta;
skp_h[2]=gamma;
skp_h[3]=rho;
skp_h[4]=tmp1;
skp_h[5]=MAGMA_D_MAKE(nom, 0.0);
magma_dsetvector( 6, skp_h, 1, skp, 1 );
if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE )
r0 = ATOLERANCE;
if ( nom < r0 )
return MAGMA_SUCCESS;
// check positive definite
if (den <= 0.0) {
printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
return -100;
}
//Chronometry
real_Double_t tempo1, tempo2;
magma_device_sync(); tempo1=magma_wtime();
if( solver_par->verbose > 0 ){
solver_par->res_vec[0] = (real_Double_t) nom0;
solver_par->timing[0] = 0.0;
}
// start iteration
for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter;
solver_par->numiter++ ){
magmablasSetKernelStream(stream[0]);
// computes SpMV and dot product
magma_dcgmerge_spmv1( A, d1, d2, d.val, z.val, skp );
// updates x, r, computes scalars and updates d
magma_dcgmerge_xrbeta( dofs, d1, d2, x->val, r.val, d.val, z.val, skp );
// check stopping criterion (asynchronous copy)
magma_dgetvector_async( 1 , skp+1, 1,
skp_h+1, 1, stream[1] );
betanom = sqrt(MAGMA_D_REAL(skp_h[1]));
if( solver_par->verbose > 0 ){
magma_device_sync(); tempo2=magma_wtime();
if( (solver_par->numiter)%solver_par->verbose==0 ) {
solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
//.........这里部分代码省略.........
示例12: magma_dpbicgstab
extern "C" magma_int_t
magma_dpbicgstab(
magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
magma_d_solver_par *solver_par,
magma_d_preconditioner *precond_par,
magma_queue_t queue )
{
magma_int_t info = 0;
// set queue for old dense routines
magma_queue_t orig_queue=NULL;
magmablasGetKernelStream( &orig_queue );
// prepare solver feedback
solver_par->solver = Magma_PBICGSTAB;
solver_par->numiter = 0;
solver_par->info = MAGMA_SUCCESS;
// some useful variables
double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE,
c_mone = MAGMA_D_NEG_ONE;
magma_int_t dofs = A.num_rows*b.num_cols;
// workspace
magma_d_matrix r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR}, ms={Magma_CSR}, mt={Magma_CSR}, y={Magma_CSR}, z={Magma_CSR};
CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &rr,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &ms,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &mt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
// solver variables
double alpha, beta, omega, rho_old, rho_new;
double nom, betanom, nom0, r0, den, res;
// solver setup
CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue));
magma_dcopy( dofs, r.dval, 1, rr.dval, 1 ); // rr = r
betanom = nom0;
nom = nom0*nom0;
rho_new = omega = alpha = MAGMA_D_MAKE( 1.0, 0. );
solver_par->init_res = nom0;
CHECK( magma_d_spmv( c_one, A, r, c_zero, v, queue )); // z = A r
den = MAGMA_D_REAL( magma_ddot(dofs, v.dval, 1, r.dval, 1) ); // den = z' * r
if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE )
r0 = ATOLERANCE;
if ( nom < r0 ) {
solver_par->final_res = solver_par->init_res;
solver_par->iter_res = solver_par->init_res;
goto cleanup;
}
//Chronometry
real_Double_t tempo1, tempo2;
tempo1 = magma_sync_wtime( queue );
if ( solver_par->verbose > 0 ) {
solver_par->res_vec[0] = nom0;
solver_par->timing[0] = 0.0;
}
solver_par->numiter = 0;
// start iteration
do
{
solver_par->numiter++;
rho_old = rho_new; // rho_old=rho
rho_new = magma_ddot( dofs, rr.dval, 1, r.dval, 1 ); // rho=<rr,r>
beta = rho_new/rho_old * alpha/omega; // beta=rho/rho_old *alpha/omega
magma_dscal( dofs, beta, p.dval, 1 ); // p = beta*p
magma_daxpy( dofs, c_mone * omega * beta, v.dval, 1 , p.dval, 1 );
// p = p-omega*beta*v
magma_daxpy( dofs, c_one, r.dval, 1, p.dval, 1 ); // p = p+r
// preconditioner
CHECK( magma_d_applyprecond_left( A, p, &mt, precond_par, queue ));
CHECK( magma_d_applyprecond_right( A, mt, &y, precond_par, queue ));
CHECK( magma_d_spmv( c_one, A, y, c_zero, v, queue )); // v = Ap
alpha = rho_new / magma_ddot( dofs, rr.dval, 1, v.dval, 1 );
magma_dcopy( dofs, r.dval, 1 , s.dval, 1 ); // s=r
magma_daxpy( dofs, c_mone * alpha, v.dval, 1 , s.dval, 1 ); // s=s-alpha*v
// preconditioner
CHECK( magma_d_applyprecond_left( A, s, &ms, precond_par, queue ));
CHECK( magma_d_applyprecond_right( A, ms, &z, precond_par, queue ));
CHECK( magma_d_spmv( c_one, A, z, c_zero, t, queue )); // t=As
// preconditioner
CHECK( magma_d_applyprecond_left( A, s, &ms, precond_par, queue ));
//.........这里部分代码省略.........
示例13: magma_dcg
extern "C" magma_int_t
magma_dcg(
magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
magma_d_solver_par *solver_par,
magma_queue_t queue )
{
magma_int_t info = 0;
// set queue for old dense routines
magma_queue_t orig_queue=NULL;
magmablasGetKernelStream( &orig_queue );
// prepare solver feedback
solver_par->solver = Magma_CG;
solver_par->numiter = 0;
solver_par->info = MAGMA_SUCCESS;
// local variables
double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE;
magma_int_t dofs = A.num_rows * b.num_cols;
// GPU workspace
magma_d_matrix r={Magma_CSR}, p={Magma_CSR}, q={Magma_CSR};
CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
// solver variables
double alpha, beta;
double nom, nom0, r0, betanom, betanomsq, den;
// solver setup
CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue));
magma_dcopy( dofs, r.dval, 1, p.dval, 1 ); // p = r
betanom = nom0;
nom = nom0 * nom0; // nom = r' * r
CHECK( magma_d_spmv( c_one, A, p, c_zero, q, queue )); // q = A p
den = MAGMA_D_REAL( magma_ddot(dofs, p.dval, 1, q.dval, 1) );// den = p dot q
solver_par->init_res = nom0;
if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE )
r0 = ATOLERANCE;
if ( nom < r0 ) {
solver_par->final_res = solver_par->init_res;
solver_par->iter_res = solver_par->init_res;
goto cleanup;
}
// check positive definite
if (den <= 0.0) {
printf("Operator A is not postive definite. (Ar,r) = %f\n", den);
magmablasSetKernelStream( orig_queue );
info = MAGMA_NONSPD;
goto cleanup;
}
//Chronometry
real_Double_t tempo1, tempo2;
tempo1 = magma_sync_wtime( queue );
if ( solver_par->verbose > 0 ) {
solver_par->res_vec[0] = (real_Double_t)nom0;
solver_par->timing[0] = 0.0;
}
solver_par->numiter = 0;
// start iteration
do
{
solver_par->numiter++;
alpha = MAGMA_D_MAKE(nom/den, 0.);
magma_daxpy(dofs, alpha, p.dval, 1, x->dval, 1); // x = x + alpha p
magma_daxpy(dofs, -alpha, q.dval, 1, r.dval, 1); // r = r - alpha q
betanom = magma_dnrm2(dofs, r.dval, 1); // betanom = || r ||
betanomsq = betanom * betanom; // betanoms = r' * r
if ( solver_par->verbose > 0 ) {
tempo2 = magma_sync_wtime( queue );
if ( (solver_par->numiter)%solver_par->verbose==0 ) {
solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
= (real_Double_t) betanom;
solver_par->timing[(solver_par->numiter)/solver_par->verbose]
= (real_Double_t) tempo2-tempo1;
}
}
if ( betanom < r0 ) {
break;
}
beta = MAGMA_D_MAKE(betanomsq/nom, 0.); // beta = betanoms/nom
magma_dscal(dofs, beta, p.dval, 1); // p = beta*p
magma_daxpy(dofs, c_one, r.dval, 1, p.dval, 1); // p = p + r
CHECK( magma_d_spmv( c_one, A, p, c_zero, q, queue )); // q = A p
den = MAGMA_D_REAL(magma_ddot(dofs, p.dval, 1, q.dval, 1));
// den = p dot q
nom = betanomsq;
}
while ( solver_par->numiter+1 <= solver_par->maxiter );
tempo2 = magma_sync_wtime( queue );
//.........这里部分代码省略.........
示例14: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing dgeqrf_mgpu
*/
int main( int argc, char** argv )
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0;
double error, work[1];
double c_neg_one = MAGMA_D_NEG_ONE;
double *h_A, *h_R, *tau, *h_work, tmp[1];
double *d_lA[ MagmaMaxGPUs ];
magma_int_t M, N, n2, lda, ldda, n_local, ngpu;
magma_int_t info, min_mn, nb, lhwork;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1}, ISEED2[4];
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= (opts.check == 2); // check (-c2) implies lapack (-l)
magma_int_t status = 0;
double tol, eps = lapackf77_dlamch("E");
tol = opts.tolerance * eps;
printf("ngpu %d\n", (int) opts.ngpu );
if ( opts.check == 1 ) {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R-Q'A||_1 / (M*||A||_1) ||I-Q'Q||_1 / M\n");
printf("================================================================================================\n");
} else {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F /(M*||A||_F)\n");
printf("==========================================================================\n");
}
for( int i = 0; i < opts.ntest; ++i ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[i];
N = opts.nsize[i];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
nb = magma_get_dgeqrf_nb( M );
gflops = FLOPS_DGEQRF( M, N ) / 1e9;
// ngpu must be at least the number of blocks
ngpu = min( opts.ngpu, int((N+nb-1)/nb) );
if ( ngpu < opts.ngpu ) {
printf( " * too many GPUs for the matrix size, using %d GPUs\n", (int) ngpu );
}
// query for workspace size
lhwork = -1;
lapackf77_dgeqrf( &M, &N, h_A, &M, tau, tmp, &lhwork, &info );
lhwork = (magma_int_t) MAGMA_D_REAL( tmp[0] );
// Allocate host memory for the matrix
TESTING_MALLOC( tau, double, min_mn );
TESTING_MALLOC( h_A, double, n2 );
TESTING_HOSTALLOC( h_R, double, n2 );
TESTING_MALLOC( h_work, double, lhwork );
// Allocate device memory
for( int dev = 0; dev < ngpu; dev++ ) {
n_local = ((N/nb)/ngpu)*nb;
if (dev < (N/nb) % ngpu)
n_local += nb;
else if (dev == (N/nb) % ngpu)
n_local += N % nb;
magma_setdevice( dev );
TESTING_DEVALLOC( d_lA[dev], double, ldda*n_local );
}
/* Initialize the matrix */
for ( int j=0; j<4; j++ ) ISEED2[j] = ISEED[j]; // saving seeds
lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
lapackf77_dlacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
if ( opts.lapack ) {
double *tau;
TESTING_MALLOC( tau, double, min_mn );
cpu_time = magma_wtime();
lapackf77_dgeqrf( &M, &N, h_A, &M, tau, h_work, &lhwork, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapack_dgeqrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
TESTING_FREE( tau );
}
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_dsetmatrix_1D_col_bcyclic( M, N, h_R, lda, d_lA, ldda, ngpu, nb );
gpu_time = magma_wtime();
//.........这里部分代码省略.........
示例15: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing dgels
*/
int main( int argc, char** argv )
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
double gpu_error, cpu_error, error, Anorm, work[1];
double c_one = MAGMA_D_ONE;
double c_neg_one = MAGMA_D_NEG_ONE;
double *h_A, *h_A2, *h_B, *h_X, *h_R, *tau, *h_work, tmp[1];
magmaDouble_ptr d_A, d_B;
magma_int_t M, N, size, nrhs, lda, ldb, ldda, lddb, min_mn, max_mn, nb, info;
magma_int_t lworkgpu, lhwork;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_opts opts;
opts.parse_opts( argc, argv );
magma_int_t status = 0;
double tol = opts.tolerance * lapackf77_dlamch("E");
nrhs = opts.nrhs;
printf("%% ||b-Ax|| / (N||A||) ||dx-x||/(N||A||)\n");
printf("%% M N NRHS CPU Gflop/s (sec) GPU Gflop/s (sec) CPU GPU \n");
printf("%%==================================================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
if ( M < N ) {
printf( "%5d %5d %5d skipping because M < N is not yet supported.\n", (int) M, (int) N, (int) nrhs );
continue;
}
min_mn = min(M, N);
max_mn = max(M, N);
lda = M;
ldb = max_mn;
ldda = magma_roundup( M, opts.align ); // multiple of 32 by default
lddb = magma_roundup( max_mn, opts.align ); // multiple of 32 by default
nb = magma_get_dgeqrf_nb( M, N );
gflops = (FLOPS_DGEQRF( M, N ) + FLOPS_DGEQRS( M, N, nrhs )) / 1e9;
lworkgpu = (M - N + nb)*(nrhs + nb) + nrhs*nb;
// query for workspace size
lhwork = -1;
lapackf77_dgels( MagmaNoTransStr, &M, &N, &nrhs,
NULL, &lda, NULL, &ldb, tmp, &lhwork, &info );
lhwork = (magma_int_t) MAGMA_D_REAL( tmp[0] );
lhwork = max( lhwork, lworkgpu );
TESTING_MALLOC_CPU( tau, double, min_mn );
TESTING_MALLOC_CPU( h_A, double, lda*N );
TESTING_MALLOC_CPU( h_A2, double, lda*N );
TESTING_MALLOC_CPU( h_B, double, ldb*nrhs );
TESTING_MALLOC_CPU( h_X, double, ldb*nrhs );
TESTING_MALLOC_CPU( h_R, double, ldb*nrhs );
TESTING_MALLOC_CPU( h_work, double, lhwork );
TESTING_MALLOC_DEV( d_A, double, ldda*N );
TESTING_MALLOC_DEV( d_B, double, lddb*nrhs );
/* Initialize the matrices */
size = lda*N;
lapackf77_dlarnv( &ione, ISEED, &size, h_A );
lapackf77_dlacpy( MagmaFullStr, &M, &N, h_A, &lda, h_A2, &lda );
// make random RHS
size = ldb*nrhs;
lapackf77_dlarnv( &ione, ISEED, &size, h_B );
lapackf77_dlacpy( MagmaFullStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
// make consistent RHS
//size = N*nrhs;
//lapackf77_dlarnv( &ione, ISEED, &size, h_X );
//blasf77_dgemm( MagmaNoTransStr, MagmaNoTransStr, &M, &nrhs, &N,
// &c_one, h_A, &lda,
// h_X, &ldb,
// &c_zero, h_B, &ldb );
//lapackf77_dlacpy( MagmaFullStr, &M, &nrhs, h_B, &ldb, h_R, &ldb );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
magma_dsetmatrix( M, N, h_A, lda, d_A, ldda );
magma_dsetmatrix( M, nrhs, h_B, ldb, d_B, lddb );
gpu_time = magma_wtime();
magma_dgels_gpu( MagmaNoTrans, M, N, nrhs, d_A, ldda,
d_B, lddb, h_work, lworkgpu, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0) {
printf("magma_dgels_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
}
//.........这里部分代码省略.........