本文整理汇总了C++中MAGMA_C_REAL函数的典型用法代码示例。如果您正苦于以下问题:C++ MAGMA_C_REAL函数的具体用法?C++ MAGMA_C_REAL怎么用?C++ MAGMA_C_REAL使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MAGMA_C_REAL函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: magma_c_isinf
/** @return true if either real(x) or imag(x) is INF. */
inline bool magma_c_isinf( magmaFloatComplex x )
{
#ifdef COMPLEX
return isinf( MAGMA_C_REAL( x )) ||
isinf( MAGMA_C_IMAG( x ));
#else
return isinf( x );
#endif
}
示例2: magma_cmake_hpd
void magma_cmake_hpd( magma_int_t N, magmaFloatComplex* A, magma_int_t lda )
{
magma_int_t i, j;
for( i=0; i<N; ++i ) {
A(i,i) = MAGMA_C_MAKE( MAGMA_C_REAL( A(i,i) ) + N, 0. );
for( j=0; j<i; ++j ) {
A(j,i) = MAGMA_C_CNJG( A(i,j) );
}
}
}
示例3: init_matrix
void init_matrix( int N, magmaFloatComplex *h_A, magma_int_t lda )
{
magma_int_t ione = 1, n2 = N*lda;
magma_int_t ISEED[4] = {0,0,0,1};
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
/* Symmetrize and increase the diagonal */
for (int i = 0; i < N; ++i) {
MAGMA_C_SET2REAL( h_A(i,i), MAGMA_C_REAL(h_A(i,i)) + N );
for (int j = 0; j < i; ++j) h_A(i, j) = MAGMA_C_CNJG( h_A(j, i) );
}
}
示例4: magma_cprint
void magma_cprint( magma_int_t m, magma_int_t n, const magmaFloatComplex *A, magma_int_t lda )
{
if ( magma_is_devptr( A ) == 1 ) {
fprintf( stderr, "ERROR: cprint called with device pointer.\n" );
exit(1);
}
magmaFloatComplex c_zero = MAGMA_C_ZERO;
if ( m == 1 ) {
printf( "[ " );
}
else {
printf( "[\n" );
}
for( int i = 0; i < m; ++i ) {
for( int j = 0; j < n; ++j ) {
if ( MAGMA_C_EQUAL( *A(i,j), c_zero )) {
printf( " 0. " );
}
else {
#if defined(PRECISION_z) || defined(PRECISION_c)
printf( " %8.4f+%8.4fi", MAGMA_C_REAL( *A(i,j) ), MAGMA_C_IMAG( *A(i,j) ));
#else
printf( " %8.4f", MAGMA_C_REAL( *A(i,j) ));
#endif
}
}
if ( m > 1 ) {
printf( "\n" );
}
else {
printf( " " );
}
}
printf( "];\n" );
}
示例5: magma_cmdiff
extern "C" magma_int_t
magma_cmdiff(
magma_c_matrix A, magma_c_matrix B,
real_Double_t *res,
magma_queue_t queue )
{
magma_int_t info = 0;
if( A.memory_location == Magma_CPU && B.memory_location == Magma_CPU
&& A.storage_type == Magma_CSR && B.storage_type == Magma_CSR ){
real_Double_t tmp2;
magma_int_t i,j,k;
*res = 0.0;
for(i=0; i<A.num_rows; i++) {
for(j=A.row[i]; j<A.row[i+1]; j++) {
magma_index_t localcol = A.col[j];
for( k=B.row[i]; k<B.row[i+1]; k++) {
if (B.col[k] == localcol) {
tmp2 = (real_Double_t) fabs( MAGMA_C_REAL(A.val[j] )
- MAGMA_C_REAL(B.val[k]) );
(*res) = (*res) + tmp2* tmp2;
}
}
}
}
(*res) = sqrt((*res));
}
else{
printf("error: mdiff only supported for CSR matrices on the CPU.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
}
return info;
}
示例6: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cheevd
*/
int main( int argc, char** argv)
{
TESTING_CUDA_INIT();
cuFloatComplex *h_A, *h_R, *h_work;
float *rwork, *w1, *w2;
magma_int_t *iwork;
float gpu_time, cpu_time;
magma_timestr_t start, end;
/* Matrix size */
magma_int_t N=0, n2;
magma_int_t size[8] = {1024,2048,3072,4032,5184,6016,7040,8064};
magma_int_t i, info;
magma_int_t ione = 1, izero = 0;
magma_int_t ISEED[4] = {0,0,0,1};
const char *uplo = MagmaLowerStr;
const char *jobz = MagmaVectorsStr;
magma_int_t checkres;
float result[3], eps = lapackf77_slamch( "E" );
if (argc != 1){
for(i = 1; i<argc; i++){
if (strcmp("-N", argv[i])==0) {
N = atoi(argv[++i]);
}
else if ( strcmp("-JV", argv[i]) == 0 ) {
jobz = MagmaVectorsStr;
}
else if ( strcmp("-JN", argv[i]) == 0 ) {
jobz = MagmaNoVectorsStr;
}
}
if (N>0)
printf(" testing_cheevd -N %d [-JV] [-JN]\n\n", (int) N);
else {
printf("\nUsage: \n");
printf(" testing_cheevd -N %d [-JV] [-JN]\n\n", (int) N);
exit(1);
}
}
else {
printf("\nUsage: \n");
printf(" testing_cheevd -N %d [-JV] [-JN]\n\n", 1024);
N = size[7];
}
checkres = getenv("MAGMA_TESTINGS_CHECK") != NULL;
if ( checkres && jobz[0] == MagmaNoVectors ) {
printf( "Cannot check results when vectors are not computed (jobz='N')\n" );
checkres = false;
}
/* Query for workspace sizes */
cuFloatComplex aux_work[1];
float aux_rwork[1];
magma_int_t aux_iwork[1];
magma_cheevd( jobz[0], uplo[0],
N, h_R, N, w1,
aux_work, -1,
aux_rwork, -1,
aux_iwork, -1,
&info );
magma_int_t lwork, lrwork, liwork;
lwork = (magma_int_t) MAGMA_C_REAL( aux_work[0] );
lrwork = (magma_int_t) aux_rwork[0];
liwork = aux_iwork[0];
/* Allocate host memory for the matrix */
TESTING_MALLOC( h_A, cuFloatComplex, N*N );
TESTING_MALLOC( w1, float , N );
TESTING_MALLOC( w2, float , N );
TESTING_HOSTALLOC( h_R, cuFloatComplex, N*N );
TESTING_HOSTALLOC( h_work, cuFloatComplex, lwork );
TESTING_MALLOC( rwork, float, lrwork );
TESTING_MALLOC( iwork, magma_int_t, liwork );
printf(" N CPU Time(s) GPU Time(s) \n");
printf("===================================\n");
for(i=0; i<8; i++){
if (argc==1){
N = size[i];
}
n2 = N*N;
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
for( int i=0; i<N; i++) {
MAGMA_C_SET2REAL( h_A[i*N+i], MAGMA_C_REAL(h_A[i*N+i]) );
}
lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
/* warm up run */
//.........这里部分代码省略.........
示例7: main
//.........这里部分代码省略.........
printf("===============================================================================================\n");
for( int i = 0; i < ntest; ++i ) {
for( int iside = 0; iside < 2; ++iside ) {
for( int itran = 0; itran < 2; ++itran ) {
m = msize[i];
n = nsize[i];
k = ksize[i];
if ( *side[iside] == 'L' && m < k ) {
printf( "%5d %5d %5d %-5s %-9s skipping because side=left and m < k\n",
(int) m, (int) n, (int) k, side[iside], trans[itran] );
continue;
}
if ( *side[iside] == 'R' && n < k ) {
printf( "%5d %5d %5d %-5s %-9s skipping because side=right and n < k\n",
(int) m, (int) n, (int) k, side[iside], trans[itran] );
continue;
}
gflops = FLOPS_CUNMQR( m, n, k, *side[iside] ) / 1e9;
// C is full, m x n
size = ldc*n;
lapackf77_clarnv( &ione, iseed, &size, C );
lapackf77_clacpy( "Full", &m, &n, C, &ldc, R, &ldc );
//magma_csetmatrix( m, n, C, ldc, dC, ldc );
// A is m x k (left) or n x k (right)
lda = (*side[iside] == 'L' ? m : n);
size = lda*k;
lapackf77_clarnv( &ione, iseed, &size, A );
// compute QR factorization to get Householder vectors in A, tau
magma_cgeqrf( lda, k, A, lda, tau, W, lwork_max, &info );
if ( info != 0 )
printf("magma_cgeqrf returned error %d\n", info);
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_cunmqr( side[iside], trans[itran],
&m, &n, &k,
A, &lda, tau, C, &ldc, W, &lwork_max, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cunmqr returned error %d.\n", (int) info);
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
// query for work size
lwork = -1;
magma_cunmqr( *side[iside], *trans[itran],
m, n, k,
A, lda, tau, R, ldc, W, lwork, &info );
if (info != 0)
printf("magma_cunmqr returned error %d (lwork query).\n", (int) info);
lwork = (magma_int_t) MAGMA_C_REAL( W[0] );
if ( lwork < 0 || lwork > lwork_max )
printf("invalid lwork %d, lwork_max %d\n", lwork, lwork_max );
gpu_time = magma_wtime();
magma_cunmqr( *side[iside], *trans[itran],
m, n, k,
A, lda, tau, R, ldc, W, lwork, &info );
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cunmqr returned error %d.\n", (int) info);
//magma_cgetmatrix( m, n, dC, ldc, R, ldc );
/* =====================================================================
compute relative error |QC_magma - QC_lapack| / |QC_lapack|
=================================================================== */
error = lapackf77_clange( "Fro", &m, &n, C, &ldc, work );
size = ldc*n;
blasf77_caxpy( &size, &c_neg_one, C, &ione, R, &ione );
error = lapackf77_clange( "Fro", &m, &n, R, &ldc, work ) / error;
printf( "%5d %5d %5d %-5s %-9s %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n",
(int) m, (int) n, (int) k, side[iside], trans[itran],
cpu_perf, cpu_time, gpu_perf, gpu_time, error );
}} // end iside, itran
printf( "\n" );
} // end i
// Memory clean up
TESTING_FREE( C );
TESTING_FREE( R );
TESTING_FREE( A );
TESTING_FREE( W );
TESTING_FREE( tau );
// Shutdown
TESTING_CUDA_FINALIZE();
return 0;
}
示例8: magma_cheevdx_2stage_m
//.........这里部分代码省略.........
liwmin = 5 * n + 3;
} else {
lwmin = lq2 + n * (nb + 1);
lrwmin = n;
liwmin = 1;
}
work[0] = MAGMA_C_MAKE( lwmin * (1. + lapackf77_slamch("Epsilon")), 0.); // round up
rwork[0] = lrwmin * (1. + lapackf77_slamch("Epsilon"));
iwork[0] = liwmin;
if ((lwork < lwmin) && !lquery) {
*info = -14;
} else if ((lrwork < lrwmin) && ! lquery) {
*info = -16;
} else if ((liwork < liwmin) && ! lquery) {
*info = -18;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
else if (lquery) {
return *info;
}
/* Quick return if possible */
if (n == 0) {
return *info;
}
if (n == 1) {
w[0] = MAGMA_C_REAL(a[0]);
if (wantz) {
a[0] = MAGMA_C_ONE;
}
return *info;
}
#ifdef ENABLE_DEBUG
printf("using %d threads\n", threads);
#endif
/* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */
magma_int_t ntiles = n/nb;
if( ( ntiles < 2 ) || ( n <= 128 ) ){
#ifdef ENABLE_DEBUG
printf("--------------------------------------------------------------\n");
printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb);
printf("--------------------------------------------------------------\n");
#endif
lapackf77_cheevd(jobz_, uplo_, &n,
a, &lda, w,
work, &lwork,
#if defined(PRECISION_z) || defined(PRECISION_c)
rwork, &lrwork,
#endif
iwork, &liwork,
info);
*m = n;
return *info;
}
/* Get machine constants. */
safmin = lapackf77_slamch("Safe minimum");
示例9: main
//.........这里部分代码省略.........
TESTING_FREE_CPU( col );
free(pntre);
#endif // MAGMA_WITH_MKL
// copy matrix to GPU
magma_c_mtransfer( hA, &dA, Magma_CPU, Magma_DEV, queue );
// SpMV on GPU (CSR) -- this is the reference!
start = magma_sync_wtime( queue );
for (j=0; j<10; j++)
magma_c_spmv( c_one, dA, dx, c_zero, dy, queue );
end = magma_sync_wtime( queue );
printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard CSR).\n",
(end-start)/10, FLOPS*10/(end-start) );
magma_c_mfree(&dA, queue );
magma_c_vtransfer( dy, &hrefvec , Magma_DEV, Magma_CPU, queue );
// convert to ELL and copy to GPU
magma_c_mconvert( hA, &hA_ELL, Magma_CSR, Magma_ELL, queue );
magma_c_mtransfer( hA_ELL, &dA_ELL, Magma_CPU, Magma_DEV, queue );
magma_c_mfree(&hA_ELL, queue );
magma_c_vfree( &dy, queue );
magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
// SpMV on GPU (ELL)
start = magma_sync_wtime( queue );
for (j=0; j<10; j++)
magma_c_spmv( c_one, dA_ELL, dx, c_zero, dy, queue );
end = magma_sync_wtime( queue );
printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (standard ELL).\n",
(end-start)/10, FLOPS*10/(end-start) );
magma_c_mfree(&dA_ELL, queue );
magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
res = 0.0;
for(magma_int_t k=0; k<hA.num_rows; k++ )
res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]);
if ( res < .000001 )
printf("# tester spmv ELL: ok\n");
else
printf("# tester spmv ELL: failed\n");
magma_c_vfree( &hcheck, queue );
// convert to SELLP and copy to GPU
magma_c_mconvert( hA, &hA_SELLP, Magma_CSR, Magma_SELLP, queue );
magma_c_mtransfer( hA_SELLP, &dA_SELLP, Magma_CPU, Magma_DEV, queue );
magma_c_mfree(&hA_SELLP, queue );
magma_c_vfree( &dy, queue );
magma_c_vinit( &dy, Magma_DEV, hA.num_rows, c_zero, queue );
// SpMV on GPU (SELLP)
start = magma_sync_wtime( queue );
for (j=0; j<10; j++)
magma_c_spmv( c_one, dA_SELLP, dx, c_zero, dy, queue );
end = magma_sync_wtime( queue );
printf( " > MAGMA: %.2e seconds %.2e GFLOP/s (SELLP).\n",
(end-start)/10, FLOPS*10/(end-start) );
magma_c_vtransfer( dy, &hcheck , Magma_DEV, Magma_CPU, queue );
res = 0.0;
for(magma_int_t k=0; k<hA.num_rows; k++ )
res=res + MAGMA_C_REAL(hcheck.val[k]) - MAGMA_C_REAL(hrefvec.val[k]);
printf("# |x-y|_F = %8.2e\n", res);
if ( res < .000001 )
printf("# tester spmv SELL-P: ok\n");
else
printf("# tester spmv SELL-P: failed\n");
magma_c_vfree( &hcheck, queue );
magma_c_mfree(&dA_SELLP, queue );
示例10: magma_cmgenerator
magma_int_t
magma_cmgenerator(
magma_int_t n,
magma_int_t offdiags,
magma_index_t *diag_offset,
magmaFloatComplex *diag_vals,
magma_c_matrix *A,
magma_queue_t queue )
{
magma_int_t info = 0;
magma_c_matrix B={Magma_CSR};
B.val = NULL;
B.col = NULL;
B.row = NULL;
B.rowidx = NULL;
B.blockinfo = NULL;
B.diag = NULL;
B.dval = NULL;
B.dcol = NULL;
B.drow = NULL;
B.drowidx = NULL;
B.ddiag = NULL;
B.list = NULL;
B.dlist = NULL;
B.num_rows = n;
B.num_cols = n;
B.fill_mode = MagmaFull;
B.memory_location = Magma_CPU;
B.storage_type = Magma_ELLPACKT;
B.max_nnz_row = (2*offdiags+1);
CHECK( magma_cmalloc_cpu( &B.val, B.max_nnz_row*n ));
CHECK( magma_index_malloc_cpu( &B.col, B.max_nnz_row*n ));
for( int i=0; i<n; i++ ) { // stride over rows
// stride over the number of nonzeros in each row
// left of diagonal
for( int j=0; j<offdiags; j++ ) {
B.val[ i*B.max_nnz_row + j ] = diag_vals[ offdiags - j ];
B.col[ i*B.max_nnz_row + j ] = -1 * diag_offset[ offdiags-j ] + i;
}
// elements on the diagonal
B.val[ i*B.max_nnz_row + offdiags ] = diag_vals[ 0 ];
B.col[ i*B.max_nnz_row + offdiags ] = i;
// right of diagonal
for( int j=0; j<offdiags; j++ ) {
B.val[ i*B.max_nnz_row + j + offdiags +1 ] = diag_vals[ j+1 ];
B.col[ i*B.max_nnz_row + j + offdiags +1 ] = diag_offset[ j+1 ] + i;
}
}
// set invalid entries to zero
for( int i=0; i<n; i++ ) { // stride over rows
for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row
if ( (B.col[i*B.max_nnz_row + j] < 0) ||
(B.col[i*B.max_nnz_row + j] >= n) ) {
B.val[ i*B.max_nnz_row + j ] = MAGMA_C_MAKE( 0.0, 0.0 );
}
}
}
B.nnz = 0;
for( int i=0; i<n; i++ ) { // stride over rows
for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row
if ( MAGMA_C_REAL( B.val[i*B.max_nnz_row + j]) != 0.0 )
B.nnz++;
}
}
B.true_nnz = B.nnz;
// converting it to CSR will remove the invalit entries completely
CHECK( magma_cmconvert( B, A, Magma_ELLPACKT, Magma_CSR, queue ));
cleanup:
if( info != 0 ){
magma_cmfree( &B, queue );
}
return info;
}
示例11: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgegqr
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
float e1, e2, e3, e4, e5, *work;
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex c_one = MAGMA_C_ONE;
magmaFloatComplex c_zero = MAGMA_C_ZERO;
magmaFloatComplex *h_A, *h_R, *tau, *dtau, *h_work, *h_rwork, tmp[1];
magmaFloatComplex *d_A, *dwork;
magma_int_t M, N, n2, lda, ldda, lwork, info, min_mn;
magma_int_t ione = 1, ldwork;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t status = 0;
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
// versions 1...4 are valid
if (opts.version < 1 || opts.version > 4) {
printf("Unknown version %d; exiting\n", opts.version );
return -1;
}
float tol, eps = lapackf77_slamch("E");
tol = 10* opts.tolerance * eps;
printf(" M N CPU GFlop/s (ms) GPU GFlop/s (ms) ||I-Q'Q||_F / M ||I-Q'Q||_I / M ||A-Q R||_I\n");
printf(" MAGMA / LAPACK MAGMA / LAPACK\n");
printf("==========================================================================================================\n");
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
if (N > 128) {
printf("%5d %5d skipping because cgegqr requires N <= 128\n",
(int) M, (int) N);
continue;
}
if (M < N) {
printf("%5d %5d skipping because cgegqr requires M >= N\n",
(int) M, (int) N);
continue;
}
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
gflops = FLOPS_CGEQRF( M, N ) / 1e9 + FLOPS_CUNGQR( M, N, N ) / 1e9;
// query for workspace size
lwork = -1;
lapackf77_cgeqrf(&M, &N, NULL, &M, NULL, tmp, &lwork, &info);
lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] );
lwork = max(lwork, 3*N*N);
ldwork = N*N;
if (opts.version == 2) {
ldwork = 3*N*N + min_mn;
}
TESTING_MALLOC_PIN( tau, magmaFloatComplex, min_mn );
TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork );
TESTING_MALLOC_PIN(h_rwork, magmaFloatComplex, lwork );
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 );
TESTING_MALLOC_CPU( h_R, magmaFloatComplex, n2 );
TESTING_MALLOC_CPU( work, float, M );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
TESTING_MALLOC_DEV( dtau, magmaFloatComplex, min_mn );
TESTING_MALLOC_DEV( dwork, magmaFloatComplex, ldwork );
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
magma_csetmatrix( M, N, h_R, lda, d_A, ldda );
// warmup
magma_cgegqr_gpu( 1, M, N, d_A, ldda, dwork, h_work, &info );
magma_csetmatrix( M, N, h_R, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_sync_wtime( 0 );
magma_cgegqr_gpu( opts.version, M, N, d_A, ldda, dwork, h_rwork, &info );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgegqr returned error %d: %s.\n",
//.........这里部分代码省略.........
示例12: main
//.........这里部分代码省略.........
TESTING_MALLOC_DEV( dC, magmaFloatComplex, ldc*n );
TESTING_MALLOC_DEV( dA, magmaFloatComplex, lda*k );
TESTING_MALLOC_DEV( dT, magmaFloatComplex, dt_size );
// C is full, m x n
size = ldc*n;
lapackf77_clarnv( &ione, ISEED, &size, C );
magma_csetmatrix( m, n, C, ldc, dC, ldc );
// A is m x k (left) or n x k (right)
lda = (side[iside] == MagmaLeft ? m : n);
size = lda*k;
lapackf77_clarnv( &ione, ISEED, &size, A );
// compute QR factorization to get Householder vectors in dA, tau, dT
magma_csetmatrix( lda, k, A, lda, dA, lda );
magma_cgeqrf_gpu( lda, k, dA, lda, tau, dT, &info );
magma_cgetmatrix( lda, k, dA, lda, A, lda );
if (info != 0)
printf("magma_cgeqrf_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_cunmqr( lapack_side_const( side[iside] ), lapack_trans_const( trans[itran] ),
&m, &n, &k,
A, &lda, tau, C, &ldc, W, &lwork_max, &info );
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cunmqr returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
// query for workspace size
lwork = -1;
magma_cunmqr_gpu( side[iside], trans[itran],
m, n, k,
dA, lda, tau, dC, ldc, W, lwork, dT, nb, &info );
if (info != 0)
printf("magma_cunmqr_gpu (lwork query) returned error %d: %s.\n",
(int) info, magma_strerror( info ));
lwork = (magma_int_t) MAGMA_C_REAL( W[0] );
if ( lwork < 0 || lwork > lwork_max )
printf("invalid lwork %d, lwork_max %d\n", (int) lwork, (int) lwork_max );
gpu_time = magma_sync_wtime( 0 ); // sync needed for L,N and R,T cases
magma_cunmqr_gpu( side[iside], trans[itran],
m, n, k,
dA, lda, tau, dC, ldc, W, lwork, dT, nb, &info );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cunmqr_gpu returned error %d: %s.\n",
(int) info, magma_strerror( info ));
magma_cgetmatrix( m, n, dC, ldc, R, ldc );
/* =====================================================================
compute relative error |QC_magma - QC_lapack| / |QC_lapack|
=================================================================== */
error = lapackf77_clange( "Fro", &m, &n, C, &ldc, work );
size = ldc*n;
blasf77_caxpy( &size, &c_neg_one, C, &ione, R, &ione );
error = lapackf77_clange( "Fro", &m, &n, R, &ldc, work ) / error;
printf( "%5d %5d %5d %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n",
(int) m, (int) n, (int) k,
lapacke_side_const( side[iside] ),
lapacke_trans_const( trans[itran] ),
cpu_perf, cpu_time, gpu_perf, gpu_time,
error, (error < tol ? "ok" : "failed") );
status += ! (error < tol);
TESTING_FREE_CPU( C );
TESTING_FREE_CPU( R );
TESTING_FREE_CPU( A );
TESTING_FREE_CPU( W );
TESTING_FREE_CPU( tau );
TESTING_FREE_DEV( dC );
TESTING_FREE_DEV( dA );
TESTING_FREE_DEV( dT );
fflush( stdout );
}
if ( opts.niter > 1 ) {
printf( "\n" );
}
}} // end iside, itran
printf( "\n" );
}
TESTING_FINALIZE();
return status;
}
示例13: dimension
//.........这里部分代码省略.........
A(0, 0), lda, e, tau,
work, ldwork,
dA, ldda, 0,
dwork, i+ib,
dwork2, ldwork2,
1, dx, dy, hwork,
stream, times);
magma_cher2k_mgpu(num_gpus, MagmaUpper, MagmaNoTrans, nb, i, ib,
c_neg_one, dwork, i+ib, 0,
d_one, dA, ldda, 0,
num_streams, stream);
/* get the next panel */
if (i-nb >= nx ) {
ib = min(nb, n-(i-nb));
ii = nb*((i-nb)/(nb*num_gpus));
did = ((i-nb)/nb)%num_gpus;
magma_setdevice(did);
magma_cgetmatrix_async( (i-nb)+ib, ib,
dA(did, 0, ii), ldda,
A(0, i-nb), lda,
stream[did][0] );
}
/* Copy superdiagonal elements back into A, and diagonal
elements into D */
for (j = i; j < i+ib; ++j) {
if ( j > 0 ) {
*A(j-1,j) = MAGMA_C_MAKE( e[j - 1], 0 );
}
d[j] = MAGMA_C_REAL( *A(j, j) );
}
} /* end of for i=... */
if ( nx > 0 ) {
if (1 <= n-nx) { /* else A is already on CPU */
for (i=0; i < nx; i += nb) {
ib = min(nb, n-i);
ii = nb*(i/(nb*num_gpus));
did = (i/nb)%num_gpus;
magma_setdevice(did);
magma_cgetmatrix_async( nx, ib,
dA(did, 0, ii), ldda,
A(0, i), lda,
stream[did][0] );
}
}
for( did=0; did < num_gpus; did++ ) {
magma_setdevice(did);
magma_queue_sync(stream[did][0]);
}
/* Use unblocked code to reduce the last or only block */
lapackf77_chetd2(uplo_, &nx, A(0, 0), &lda, d, e, tau, &iinfo);
}
}
else {
trace_init( 1, num_gpus, num_streams, (CUstream_st**)stream );
/* Copy the matrix to the GPU */
if (1 <= n-nx) {
magma_chtodhe(num_gpus, uplo, n, nb, A, lda, dA, ldda, stream, &iinfo );
}
示例14: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgeqrf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0;
float error, work[1];
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex *h_A, *h_R, *tau, *h_work, tmp[1];
magmaFloatComplex_ptr d_A, dT;
magma_int_t M, N, n2, lda, ldda, lwork, info, min_mn, nb, size;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1}, ISEED2[4];
magma_opts opts;
parse_opts( argc, argv, &opts );
magma_int_t status = 0;
float tol;
opts.lapack |= (opts.version == 2 && opts.check == 2); // check (-c2) implies lapack (-l)
if ( opts.version != 2 && opts.check == 1 ) {
printf( "NOTE: version %d requires -c2 check due to the special structure of the\n"
"MAGMA cgeqrf results; using -c2.\n\n", (int) opts.version );
opts.check = 2;
}
printf( "version %d\n", (int) opts.version );
if ( opts.version == 2 ) {
if ( opts.check == 1 ) {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R-Q'A||_1 / (M*||A||_1*eps) ||I-Q'Q||_1 / (M*eps)\n");
printf("=========================================================================================================\n");
} else {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F / ||A||_F\n");
printf("=======================================================================\n");
}
tol = 1.0;
} else {
printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||Ax-b||_F/(N*||A||_F*||x||_F)\n");
printf("====================================================================================\n");
tol = opts.tolerance * lapackf77_slamch("E");
}
for( int itest = 0; itest < opts.ntest; ++itest ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[itest];
N = opts.nsize[itest];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
gflops = FLOPS_CGEQRF( M, N ) / 1e9;
// query for workspace size
lwork = -1;
lapackf77_cgeqrf(&M, &N, NULL, &M, NULL, tmp, &lwork, &info);
lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] );
TESTING_MALLOC_CPU( tau, magmaFloatComplex, min_mn );
TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 );
TESTING_MALLOC_CPU( h_work, magmaFloatComplex, lwork );
TESTING_MALLOC_PIN( h_R, magmaFloatComplex, n2 );
TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
/* Initialize the matrix */
for ( int j=0; j<4; j++ )
ISEED2[j] = ISEED[j]; // save seeds
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
magma_csetmatrix( M, N, h_R, lda, d_A, 0, ldda, opts.queue );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_wtime();
if ( opts.version == 2 ) {
magma_cgeqrf2_gpu( M, N, d_A, 0, ldda, tau, opts.queues2, &info );
}
else {
nb = magma_get_cgeqrf_nb( M );
size = (2*min(M, N) + (N+31)/32*32 )*nb;
TESTING_MALLOC_DEV( dT, magmaFloatComplex, size );
if ( opts.version == 1 ) {
magma_cgeqrf_gpu( M, N, d_A, 0, ldda, tau, dT, 0, opts.queue, &info );
}
#ifdef HAVE_CUBLAS
else if ( opts.version == 3 ) {
magma_cgeqrf3_gpu( M, N, d_A, 0, ldda, tau, dT, opts.queue, &info );
}
#endif
else {
printf( "Unknown version %d\n", opts.version );
exit(1);
}
}
gpu_time = magma_wtime() - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
//.........这里部分代码省略.........
示例15: main
/* ////////////////////////////////////////////////////////////////////////////
-- Testing cgeqrf
*/
int main( int argc, char** argv)
{
TESTING_INIT();
real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
float error, work[1];
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex *h_A, *h_R, *tau, *dtau, *h_work, tmp[1];
magmaFloatComplex *d_A;
float *dwork;
magma_int_t M, N, n2, lda, ldda, lwork, info, min_mn;
magma_int_t ione = 1;
magma_int_t ISEED[4] = {0,0,0,1};
magma_opts opts;
parse_opts( argc, argv, &opts );
opts.lapack |= opts.check; // check (-c) implies lapack (-l)
printf(" M N CPU GFlop/s (ms) GPU GFlop/s (ms) ||R||_F / ||A||_F\n");
printf("=======================================================================\n");
for( int i = 0; i < opts.ntest; ++i ) {
for( int iter = 0; iter < opts.niter; ++iter ) {
M = opts.msize[i];
N = opts.nsize[i];
min_mn = min(M, N);
lda = M;
n2 = lda*N;
ldda = ((M+31)/32)*32;
gflops = FLOPS_CGEQRF( M, N ) / 1e9;
lwork = -1;
lapackf77_cgeqrf(&M, &N, h_A, &M, tau, tmp, &lwork, &info);
lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] );
TESTING_MALLOC( tau, magmaFloatComplex, min_mn );
TESTING_MALLOC( h_A, magmaFloatComplex, n2 );
TESTING_HOSTALLOC( h_R, magmaFloatComplex, n2 );
TESTING_DEVALLOC( d_A, magmaFloatComplex, ldda*N );
TESTING_DEVALLOC( dtau, magmaFloatComplex, min_mn );
TESTING_DEVALLOC(dwork, float, min_mn );
TESTING_MALLOC( h_work, magmaFloatComplex, lwork );
/* Initialize the matrix */
lapackf77_clarnv( &ione, ISEED, &n2, h_A );
lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda );
magma_csetmatrix( M, N, h_R, lda, d_A, ldda );
// warmup
magma_cgeqr2_gpu( M, N, d_A, ldda, dtau, dwork, &info );
magma_csetmatrix( M, N, h_R, lda, d_A, ldda );
/* ====================================================================
Performs operation using MAGMA
=================================================================== */
gpu_time = magma_sync_wtime( 0 );
magma_cgeqr2_gpu( M, N, d_A, ldda, dtau, dwork, &info );
gpu_time = magma_sync_wtime( 0 ) - gpu_time;
gpu_perf = gflops / gpu_time;
if (info != 0)
printf("magma_cgeqrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
if ( opts.lapack ) {
/* =====================================================================
Performs operation using LAPACK
=================================================================== */
cpu_time = magma_wtime();
lapackf77_cgeqrf(&M, &N, h_A, &lda, tau, h_work, &lwork, &info);
cpu_time = magma_wtime() - cpu_time;
cpu_perf = gflops / cpu_time;
if (info != 0)
printf("lapackf77_cgeqrf returned error %d: %s.\n",
(int) info, magma_strerror( info ));
/* =====================================================================
Check the result compared to LAPACK
=================================================================== */
magma_cgetmatrix( M, N, d_A, ldda, h_R, M );
error = lapackf77_clange("f", &M, &N, h_A, &lda, work);
blasf77_caxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
error = lapackf77_clange("f", &M, &N, h_R, &lda, work) / error;
printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n",
(int) M, (int) N, cpu_perf, 1000.*cpu_time, gpu_perf, 1000.*gpu_time, error );
}
else {
printf("%5d %5d --- ( --- ) %7.2f (%7.2f) --- \n",
(int) M, (int) N, gpu_perf, 1000.*gpu_time );
}
TESTING_FREE( tau );
TESTING_FREE( h_A );
TESTING_FREE( h_work );
TESTING_HOSTFREE( h_R );
//.........这里部分代码省略.........