本文整理汇总了C++中magma_queue_t类的典型用法代码示例。如果您正苦于以下问题:C++ magma_queue_t类的具体用法?C++ magma_queue_t怎么用?C++ magma_queue_t使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了magma_queue_t类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: magma_copyvector_async
/***************************************************************************//**
@fn magma_copyvector_async( n, elemSize, dx_src, incx, dy_dst, incy, queue )
Copy vector dx_src on GPU device to dy_dst on GPU device.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
With CUDA unified addressing, dx and dy can be on different GPUs.
This version is asynchronous: it may return before the transfer finishes.
See magma_copyvector() for a synchronous version.
@param[in]
n Number of elements in vector.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dx_src Source array of dimension (1 + (n-1))*incx, on GPU device.
@param[in]
incx Increment between elements of hx_src. incx > 0.
@param[out]
dy_dst Destination array of dimension (1 + (n-1))*incy, on GPU device.
@param[in]
incy Increment between elements of dy_dst. incy > 0.
@param[in]
queue Queue to execute in.
@ingroup magma_copyvector
*******************************************************************************/
extern "C" void
magma_copyvector_async_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
magma_ptr dy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
// for backwards compatability, accepts NULL queue to mean NULL stream.
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
if ( incx == 1 && incy == 1 ) {
cudaError_t status;
status = cudaMemcpyAsync(
dy_dst,
dx_src,
int(n*elemSize), cudaMemcpyDeviceToDevice, stream );
check_xerror( status, func, file, line );
}
else {
magma_copymatrix_async_internal(
1, n, elemSize, dx_src, incx, dy_dst, incy, queue, func, file, line );
}
}
示例2: magma_copymatrix
/***************************************************************************//**
@fn magma_copymatrix( m, n, elemSize, dA_src, ldda, dB_dst, lddb, queue )
Copy all or part of matrix dA_src on GPU device to dB_dst on GPU device.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
With CUDA unified addressing, dA and dB can be on different GPUs.
This version synchronizes the queue after the transfer.
See magma_copymatrix_async() for an asynchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dA_src Source array of dimension (ldda,n).
@param[in]
ldda Leading dimension of matrix A. ldda >= m.
@param[out]
dB_dst Destination array of dimension (lddb,n), on GPU device.
@param[in]
lddb Leading dimension of matrix B. lddb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_copymatrix
*******************************************************************************/
extern "C" void
magma_copymatrix_q_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
magma_const_ptr dA_src, magma_int_t ldda,
magma_ptr dB_dst, magma_int_t lddb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
assert( queue != NULL );
cudaError_t status;
status = cudaMemcpy2DAsync(
dB_dst, int(lddb*elemSize),
dA_src, int(ldda*elemSize),
int(m*elemSize), int(n), cudaMemcpyDeviceToDevice, queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
示例3: magma_getmatrix
/***************************************************************************//**
@fn magma_getmatrix( m, n, elemSize, dA_src, ldda, hB_dst, ldb, queue )
Copy all or part of matrix dA_src on GPU device to hB_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version synchronizes the queue after the transfer.
See magma_getmatrix_async() for an asynchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dA_src Source array of dimension (ldda,n), on GPU device.
@param[in]
ldda Leading dimension of matrix A. ldda >= m.
@param[out]
hB_dst Destination array of dimension (ldb,n), on CPU host.
@param[in]
ldb Leading dimension of matrix B. ldb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_getmatrix
*******************************************************************************/
extern "C" void
magma_getmatrix_q_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
magma_const_ptr dA_src, magma_int_t ldda,
void* hB_dst, magma_int_t ldb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
assert( queue != NULL );
cublasStatus_t status;
status = cublasGetMatrixAsync(
int(m), int(n), int(elemSize),
dA_src, int(ldda),
hB_dst, int(ldb), queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
示例4: magma_copyvector_q_internal
// TODO compare performance with cublasZcopy BLAS function.
// But this implementation can handle any element size, not just [sdcz] precisions.
extern "C" void
magma_copyvector_q_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
magma_ptr dy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
assert( queue != NULL );
if ( incx == 1 && incy == 1 ) {
cudaError_t status;
status = cudaMemcpyAsync(
dy_dst,
dx_src,
int(n*elemSize), cudaMemcpyDeviceToDevice, queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
else {
magma_copymatrix_q_internal(
1, n, elemSize, dx_src, incx, dy_dst, incy, queue, func, file, line );
}
}
示例5: magma_getvector
/***************************************************************************//**
@fn magma_getvector( n, elemSize, dx_src, incx, hy_dst, incy, queue )
Copy vector dx_src on GPU device to hy_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version synchronizes the queue after the transfer.
See magma_getvector_async() for an asynchronous version.
@param[in]
n Number of elements in vector.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dx_src Source array of dimension (1 + (n-1))*incx, on GPU device.
@param[in]
incx Increment between elements of hx_src. incx > 0.
@param[out]
hy_dst Destination array of dimension (1 + (n-1))*incy, on CPU host.
@param[in]
incy Increment between elements of dy_dst. incy > 0.
@param[in]
queue Queue to execute in.
@ingroup magma_getvector
*******************************************************************************/
extern "C" void
magma_getvector_q_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
void* hy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
cublasStatus_t status;
status = cublasGetVectorAsync(
int(n), int(elemSize),
dx_src, int(incx),
hy_dst, int(incy), queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
示例6: magma_getmatrix_async
/***************************************************************************//**
@fn magma_getmatrix_async( m, n, elemSize, dA_src, ldda, hB_dst, ldb, queue )
Copy all or part of matrix dA_src on GPU device to hB_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version is asynchronous: it may return before the transfer finishes,
if hB_dst is pinned CPU memory.
See magma_getmatrix() for a synchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dA_src Source array of dimension (ldda,n), on GPU device.
@param[in]
ldda Leading dimension of matrix A. ldda >= m.
@param[out]
hB_dst Destination array of dimension (ldb,n), on CPU host.
@param[in]
ldb Leading dimension of matrix B. ldb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_getmatrix
*******************************************************************************/
extern "C" void
magma_getmatrix_async_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
magma_const_ptr dA_src, magma_int_t ldda,
void* hB_dst, magma_int_t ldb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
cublasStatus_t status;
status = cublasGetMatrixAsync(
int(m), int(n), int(elemSize),
dA_src, int(ldda),
hB_dst, int(ldb), stream );
check_xerror( status, func, file, line );
}
示例7: magma_getvector_async
/***************************************************************************//**
@fn magma_getvector_async( n, elemSize, dx_src, incx, hy_dst, incy, queue )
Copy vector dx_src on GPU device to hy_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version is asynchronous: it may return before the transfer finishes,
if hy_dst is pinned CPU memory.
See magma_getvector() for a synchronous version.
@param[in]
n Number of elements in vector.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dx_src Source array of dimension (1 + (n-1))*incx, on GPU device.
@param[in]
incx Increment between elements of hx_src. incx > 0.
@param[out]
hy_dst Destination array of dimension (1 + (n-1))*incy, on CPU host.
@param[in]
incy Increment between elements of dy_dst. incy > 0.
@param[in]
queue Queue to execute in.
@ingroup magma_getvector
*******************************************************************************/
extern "C" void
magma_getvector_async_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
void* hy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
// for backwards compatability, accepts NULL queue to mean NULL stream.
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
cublasStatus_t status;
status = cublasGetVectorAsync(
int(n), int(elemSize),
dx_src, int(incx),
hy_dst, int(incy), stream );
check_xerror( status, func, file, line );
}
示例8: magma_copymatrix_async
/***************************************************************************//**
@fn magma_copymatrix_async( m, n, elemSize, dA_src, ldda, dB_dst, lddb, queue )
Copy all or part of matrix dA_src on GPU device to dB_dst on GPU device.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
With CUDA unified addressing, dA and dB can be on different GPUs.
This version is asynchronous: it may return before the transfer finishes.
See magma_copyvector() for a synchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dA_src Source array of dimension (ldda,n), on GPU device.
@param[in]
ldda Leading dimension of matrix A. ldda >= m.
@param[out]
dB_dst Destination array of dimension (lddb,n), on GPU device.
@param[in]
lddb Leading dimension of matrix B. lddb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_copymatrix
*******************************************************************************/
extern "C" void
magma_copymatrix_async_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
magma_const_ptr dA_src, magma_int_t ldda,
magma_ptr dB_dst, magma_int_t lddb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
// for backwards compatability, accepts NULL queue to mean NULL stream.
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
cudaError_t status;
status = cudaMemcpy2DAsync(
dB_dst, int(lddb*elemSize),
dA_src, int(ldda*elemSize),
int(m*elemSize), int(n), cudaMemcpyDeviceToDevice, stream );
check_xerror( status, func, file, line );
}
示例9: magma_setmatrix_async
/***************************************************************************//**
@fn magma_setmatrix_async( m, n, elemSize, hA_src, lda, dB_dst, lddb, queue )
Copy all or part of matrix hA_src on CPU host to dB_dst on GPU device.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version is asynchronous: it may return before the transfer finishes,
if hA_src is pinned CPU memory.
See magma_setmatrix() for a synchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
hA_src Source array of dimension (lda,n), on CPU host.
@param[in]
lda Leading dimension of matrix A. lda >= m.
@param[out]
dB_dst Destination array of dimension (lddb,n), on GPU device.
@param[in]
lddb Leading dimension of matrix B. lddb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_setmatrix
*******************************************************************************/
extern "C" void
magma_setmatrix_async_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
void const* hA_src, magma_int_t lda,
magma_ptr dB_dst, magma_int_t lddb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
// for backwards compatability, accepts NULL queue to mean NULL stream.
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
cublasStatus_t status;
status = cublasSetMatrixAsync(
int(m), int(n), int(elemSize),
hA_src, int(lda),
dB_dst, int(lddb), stream );
check_xerror( status, func, file, line );
}
示例10: magma_cmtransposeconjugate
extern "C" magma_int_t
magma_cmtransposeconjugate(
magma_c_matrix A,
magma_c_matrix *B,
magma_queue_t queue )
{
// for symmetric matrices: convert to csc using cusparse
magma_int_t info = 0;
cusparseHandle_t handle=NULL;
cusparseMatDescr_t descrA=NULL;
cusparseMatDescr_t descrB=NULL;
magma_c_matrix ACSR={Magma_CSR}, BCSR={Magma_CSR};
magma_c_matrix A_d={Magma_CSR}, B_d={Magma_CSR};
if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) {
// fill in information for B
B->storage_type = A.storage_type;
B->diagorder_type = A.diagorder_type;
B->memory_location = Magma_DEV;
B->num_rows = A.num_cols; // transposed
B->num_cols = A.num_rows; // transposed
B->nnz = A.nnz;
B->true_nnz = A.true_nnz;
if ( A.fill_mode == MagmaFull ) {
B->fill_mode = MagmaFull;
}
else if ( A.fill_mode == MagmaLower ) {
B->fill_mode = MagmaUpper;
}
else if ( A.fill_mode == MagmaUpper ) {
B->fill_mode = MagmaLower;
}
B->dval = NULL;
B->drow = NULL;
B->dcol = NULL;
// memory allocation
CHECK( magma_cmalloc( &B->dval, B->nnz ));
CHECK( magma_index_malloc( &B->drow, B->num_rows + 1 ));
CHECK( magma_index_malloc( &B->dcol, B->nnz ));
// CUSPARSE context //
CHECK_CUSPARSE( cusparseCreate( &handle ));
CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB ));
CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE(
cusparseCcsr2csc( handle, A.num_rows, A.num_cols, A.nnz,
A.dval, A.drow, A.dcol, B->dval, B->dcol, B->drow,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO) );
CHECK( magma_cmconjugate( B, queue ));
} else if ( A.memory_location == Magma_CPU ){
CHECK( magma_cmtransfer( A, &A_d, A.memory_location, Magma_DEV, queue ));
CHECK( magma_cmtransposeconjugate( A_d, &B_d, queue ));
CHECK( magma_cmtransfer( B_d, B, Magma_DEV, A.memory_location, queue ));
} else {
CHECK( magma_cmconvert( A, &ACSR, A.storage_type, Magma_CSR, queue ));
CHECK( magma_cmtransposeconjugate( ACSR, &BCSR, queue ));
CHECK( magma_cmconvert( BCSR, B, Magma_CSR, A.storage_type, queue ));
}
cleanup:
cusparseDestroyMatDescr( descrA );
cusparseDestroyMatDescr( descrB );
cusparseDestroy( handle );
magma_cmfree( &A_d, queue );
magma_cmfree( &B_d, queue );
magma_cmfree( &ACSR, queue );
magma_cmfree( &BCSR, queue );
if( info != 0 ){
magma_cmfree( B, queue );
}
return info;
}
示例11: magma_d_spmv
extern "C" magma_int_t
magma_d_spmv(
double alpha,
magma_d_matrix A,
magma_d_matrix x,
double beta,
magma_d_matrix y,
magma_queue_t queue )
{
magma_int_t info = 0;
magma_d_matrix x2={Magma_CSR};
cusparseHandle_t cusparseHandle = 0;
cusparseMatDescr_t descr = 0;
// make sure RHS is a dense matrix
if ( x.storage_type != Magma_DENSE ) {
printf("error: only dense vectors are supported for SpMV.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
goto cleanup;
}
if ( A.memory_location != x.memory_location ||
x.memory_location != y.memory_location ) {
printf("error: linear algebra objects are not located in same memory!\n");
printf("memory locations are: %d %d %d\n",
A.memory_location, x.memory_location, y.memory_location );
info = MAGMA_ERR_INVALID_PTR;
goto cleanup;
}
// DEV case
if ( A.memory_location == Magma_DEV ) {
if ( A.num_cols == x.num_rows && x.num_cols == 1 ) {
if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR
|| A.storage_type == Magma_CSRL
|| A.storage_type == Magma_CSRU ) {
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descr ));
CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO ));
cusparseDcsrmv( cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE,
A.num_rows, A.num_cols, A.nnz, &alpha, descr,
A.dval, A.drow, A.dcol, x.dval, &beta, y.dval );
}
else if ( A.storage_type == Magma_ELL ) {
//printf("using ELLPACKT kernel for SpMV: ");
CHECK( magma_dgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta,
y.dval, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_ELLPACKT ) {
//printf("using ELL kernel for SpMV: ");
CHECK( magma_dgeellmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta,
y.dval, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_ELLRT ) {
//printf("using ELLRT kernel for SpMV: ");
CHECK( magma_dgeellrtmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.max_nnz_row, alpha, A.dval, A.dcol, A.drow, x.dval,
beta, y.dval, A.alignment, A.blocksize, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_SELLP ) {
//printf("using SELLP kernel for SpMV: ");
CHECK( magma_dgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.blocksize, A.numblocks, A.alignment,
alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_DENSE ) {
//printf("using DENSE kernel for SpMV: ");
magmablas_dgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha,
A.dval, A.num_rows, x.dval, 1, beta, y.dval,
1, queue );
//printf("done.\n");
}
else if ( A.storage_type == Magma_SPMVFUNCTION ) {
//printf("using DENSE kernel for SpMV: ");
CHECK( magma_dcustomspmv( alpha, x, beta, y, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_BCSR ) {
//printf("using CUSPARSE BCSR kernel for SpMV: ");
// CUSPARSE context //
cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW;
int mb = magma_ceildiv( A.num_rows, A.blocksize );
int nb = magma_ceildiv( A.num_cols, A.blocksize );
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descr ));
cusparseDbsrmv( cusparseHandle, dirA,
CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks,
//.........这里部分代码省略.........
示例12: magma_cpidr_strms
extern "C" magma_int_t
magma_cpidr_strms(
magma_c_matrix A, magma_c_matrix b, magma_c_matrix *x,
magma_c_solver_par *solver_par,
magma_c_preconditioner *precond_par,
magma_queue_t queue )
{
magma_int_t info = MAGMA_NOTCONVERGED;
// prepare solver feedback
solver_par->solver = Magma_PIDRMERGE;
solver_par->numiter = 0;
solver_par->spmv_count = 0;
solver_par->init_res = 0.0;
solver_par->final_res = 0.0;
solver_par->iter_res = 0.0;
solver_par->runtime = 0.0;
// constants
const magmaFloatComplex c_zero = MAGMA_C_ZERO;
const magmaFloatComplex c_one = MAGMA_C_ONE;
const magmaFloatComplex c_n_one = MAGMA_C_NEG_ONE;
// internal user options
const magma_int_t smoothing = 1; // 0 = disable, 1 = enable
const float angle = 0.7; // [0-1]
// local variables
magma_int_t iseed[4] = {0, 0, 0, 1};
magma_int_t dof;
magma_int_t s;
magma_int_t distr;
magma_int_t k, i, sk;
magma_int_t innerflag;
magma_int_t ldd;
magma_int_t q;
float residual;
float nrm;
float nrmb;
float nrmr;
float nrmt;
float rho;
magmaFloatComplex om;
magmaFloatComplex gamma;
// matrices and vectors
magma_c_matrix dxs = {Magma_CSR};
magma_c_matrix dr = {Magma_CSR}, drs = {Magma_CSR};
magma_c_matrix dP = {Magma_CSR}, dP1 = {Magma_CSR};
magma_c_matrix dG = {Magma_CSR}, dGcol = {Magma_CSR};
magma_c_matrix dU = {Magma_CSR};
magma_c_matrix dM = {Magma_CSR};
magma_c_matrix df = {Magma_CSR};
magma_c_matrix dt = {Magma_CSR}, dtt = {Magma_CSR};
magma_c_matrix dc = {Magma_CSR};
magma_c_matrix dv = {Magma_CSR};
magma_c_matrix dlu = {Magma_CSR};
magma_c_matrix dskp = {Magma_CSR};
magma_c_matrix dalpha = {Magma_CSR};
magma_c_matrix dbeta = {Magma_CSR};
magmaFloatComplex *hMdiag = NULL;
magmaFloatComplex *hskp = NULL;
magmaFloatComplex *halpha = NULL;
magmaFloatComplex *hbeta = NULL;
magmaFloatComplex *d1 = NULL, *d2 = NULL;
// queue variables
const magma_int_t nqueues = 3; // number of queues
magma_queue_t queues[nqueues];
// chronometry
real_Double_t tempo1, tempo2;
// create additional queues
queues[0] = queue;
for ( q = 1; q < nqueues; q++ ) {
magma_queue_create( queue->device(), &(queues[q]) );
}
// initial s space
// TODO: add option for 's' (shadow space number)
// Hack: uses '--restart' option as the shadow space number.
// This is not a good idea because the default value of restart option is used to detect
// if the user provided a custom restart. This means that if the default restart value
// is changed then the code will think it was the user (unless the default value is
// also updated in the 'if' statement below.
s = 1;
if ( solver_par->restart != 50 ) {
if ( solver_par->restart > A.num_cols ) {
s = A.num_cols;
} else {
s = solver_par->restart;
}
}
solver_par->restart = s;
// set max iterations
solver_par->maxiter = min( 2 * A.num_cols, solver_par->maxiter );
// check if matrix A is square
//.........这里部分代码省略.........