本文整理汇总了C++中magma_queue_t::cuda_stream方法的典型用法代码示例。如果您正苦于以下问题:C++ magma_queue_t::cuda_stream方法的具体用法?C++ magma_queue_t::cuda_stream怎么用?C++ magma_queue_t::cuda_stream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类magma_queue_t
的用法示例。
在下文中一共展示了magma_queue_t::cuda_stream方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: magma_copyvector
/***************************************************************************//**
@fn magma_copyvector_async( n, elemSize, dx_src, incx, dy_dst, incy, queue )
Copy vector dx_src on GPU device to dy_dst on GPU device.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
With CUDA unified addressing, dx and dy can be on different GPUs.
This version is asynchronous: it may return before the transfer finishes.
See magma_copyvector() for a synchronous version.
@param[in]
n Number of elements in vector.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dx_src Source array of dimension (1 + (n-1))*incx, on GPU device.
@param[in]
incx Increment between elements of hx_src. incx > 0.
@param[out]
dy_dst Destination array of dimension (1 + (n-1))*incy, on GPU device.
@param[in]
incy Increment between elements of dy_dst. incy > 0.
@param[in]
queue Queue to execute in.
@ingroup magma_copyvector
*******************************************************************************/
extern "C" void
magma_copyvector_async_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
magma_ptr dy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
// for backwards compatability, accepts NULL queue to mean NULL stream.
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
if ( incx == 1 && incy == 1 ) {
cudaError_t status;
status = cudaMemcpyAsync(
dy_dst,
dx_src,
int(n*elemSize), cudaMemcpyDeviceToDevice, stream );
check_xerror( status, func, file, line );
}
else {
magma_copymatrix_async_internal(
1, n, elemSize, dx_src, incx, dy_dst, incy, queue, func, file, line );
}
}
示例2: magma_copymatrix_async
/***************************************************************************//**
@fn magma_copymatrix( m, n, elemSize, dA_src, ldda, dB_dst, lddb, queue )
Copy all or part of matrix dA_src on GPU device to dB_dst on GPU device.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
With CUDA unified addressing, dA and dB can be on different GPUs.
This version synchronizes the queue after the transfer.
See magma_copymatrix_async() for an asynchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dA_src Source array of dimension (ldda,n).
@param[in]
ldda Leading dimension of matrix A. ldda >= m.
@param[out]
dB_dst Destination array of dimension (lddb,n), on GPU device.
@param[in]
lddb Leading dimension of matrix B. lddb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_copymatrix
*******************************************************************************/
extern "C" void
magma_copymatrix_q_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
magma_const_ptr dA_src, magma_int_t ldda,
magma_ptr dB_dst, magma_int_t lddb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
assert( queue != NULL );
cudaError_t status;
status = cudaMemcpy2DAsync(
dB_dst, int(lddb*elemSize),
dA_src, int(ldda*elemSize),
int(m*elemSize), int(n), cudaMemcpyDeviceToDevice, queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
示例3: magma_getmatrix_async
/***************************************************************************//**
@fn magma_getmatrix( m, n, elemSize, dA_src, ldda, hB_dst, ldb, queue )
Copy all or part of matrix dA_src on GPU device to hB_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version synchronizes the queue after the transfer.
See magma_getmatrix_async() for an asynchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dA_src Source array of dimension (ldda,n), on GPU device.
@param[in]
ldda Leading dimension of matrix A. ldda >= m.
@param[out]
hB_dst Destination array of dimension (ldb,n), on CPU host.
@param[in]
ldb Leading dimension of matrix B. ldb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_getmatrix
*******************************************************************************/
extern "C" void
magma_getmatrix_q_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
magma_const_ptr dA_src, magma_int_t ldda,
void* hB_dst, magma_int_t ldb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
assert( queue != NULL );
cublasStatus_t status;
status = cublasGetMatrixAsync(
int(m), int(n), int(elemSize),
dA_src, int(ldda),
hB_dst, int(ldb), queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
示例4: assert
// TODO compare performance with cublasZcopy BLAS function.
// But this implementation can handle any element size, not just [sdcz] precisions.
extern "C" void
magma_copyvector_q_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
magma_ptr dy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
assert( queue != NULL );
if ( incx == 1 && incy == 1 ) {
cudaError_t status;
status = cudaMemcpyAsync(
dy_dst,
dx_src,
int(n*elemSize), cudaMemcpyDeviceToDevice, queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
else {
magma_copymatrix_q_internal(
1, n, elemSize, dx_src, incx, dy_dst, incy, queue, func, file, line );
}
}
示例5: magma_getvector_async
/***************************************************************************//**
@fn magma_getvector( n, elemSize, dx_src, incx, hy_dst, incy, queue )
Copy vector dx_src on GPU device to hy_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version synchronizes the queue after the transfer.
See magma_getvector_async() for an asynchronous version.
@param[in]
n Number of elements in vector.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dx_src Source array of dimension (1 + (n-1))*incx, on GPU device.
@param[in]
incx Increment between elements of hx_src. incx > 0.
@param[out]
hy_dst Destination array of dimension (1 + (n-1))*incy, on CPU host.
@param[in]
incy Increment between elements of dy_dst. incy > 0.
@param[in]
queue Queue to execute in.
@ingroup magma_getvector
*******************************************************************************/
extern "C" void
magma_getvector_q_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
void* hy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
cublasStatus_t status;
status = cublasGetVectorAsync(
int(n), int(elemSize),
dx_src, int(incx),
hy_dst, int(incy), queue->cuda_stream() );
cudaStreamSynchronize( queue->cuda_stream() );
check_xerror( status, func, file, line );
}
示例6: magma_getmatrix
/***************************************************************************//**
@fn magma_getmatrix_async( m, n, elemSize, dA_src, ldda, hB_dst, ldb, queue )
Copy all or part of matrix dA_src on GPU device to hB_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version is asynchronous: it may return before the transfer finishes,
if hB_dst is pinned CPU memory.
See magma_getmatrix() for a synchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dA_src Source array of dimension (ldda,n), on GPU device.
@param[in]
ldda Leading dimension of matrix A. ldda >= m.
@param[out]
hB_dst Destination array of dimension (ldb,n), on CPU host.
@param[in]
ldb Leading dimension of matrix B. ldb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_getmatrix
*******************************************************************************/
extern "C" void
magma_getmatrix_async_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
magma_const_ptr dA_src, magma_int_t ldda,
void* hB_dst, magma_int_t ldb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
cublasStatus_t status;
status = cublasGetMatrixAsync(
int(m), int(n), int(elemSize),
dA_src, int(ldda),
hB_dst, int(ldb), stream );
check_xerror( status, func, file, line );
}
示例7: magma_getvector
/***************************************************************************//**
@fn magma_getvector_async( n, elemSize, dx_src, incx, hy_dst, incy, queue )
Copy vector dx_src on GPU device to hy_dst on CPU host.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version is asynchronous: it may return before the transfer finishes,
if hy_dst is pinned CPU memory.
See magma_getvector() for a synchronous version.
@param[in]
n Number of elements in vector.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
dx_src Source array of dimension (1 + (n-1))*incx, on GPU device.
@param[in]
incx Increment between elements of hx_src. incx > 0.
@param[out]
hy_dst Destination array of dimension (1 + (n-1))*incy, on CPU host.
@param[in]
incy Increment between elements of dy_dst. incy > 0.
@param[in]
queue Queue to execute in.
@ingroup magma_getvector
*******************************************************************************/
extern "C" void
magma_getvector_async_internal(
magma_int_t n, magma_int_t elemSize,
magma_const_ptr dx_src, magma_int_t incx,
void* hy_dst, magma_int_t incy,
magma_queue_t queue,
const char* func, const char* file, int line )
{
// for backwards compatability, accepts NULL queue to mean NULL stream.
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
cublasStatus_t status;
status = cublasGetVectorAsync(
int(n), int(elemSize),
dx_src, int(incx),
hy_dst, int(incy), stream );
check_xerror( status, func, file, line );
}
示例8: magma_setmatrix
/***************************************************************************//**
@fn magma_setmatrix_async( m, n, elemSize, hA_src, lda, dB_dst, lddb, queue )
Copy all or part of matrix hA_src on CPU host to dB_dst on GPU device.
Elements may be arbitrary size.
Type-safe versions set elemSize appropriately.
This version is asynchronous: it may return before the transfer finishes,
if hA_src is pinned CPU memory.
See magma_setmatrix() for a synchronous version.
@param[in]
m Number of rows of matrix A. m >= 0.
@param[in]
n Number of columns of matrix A. n >= 0.
@param[in]
elemSize Size of each element, e.g., sizeof(double).
@param[in]
hA_src Source array of dimension (lda,n), on CPU host.
@param[in]
lda Leading dimension of matrix A. lda >= m.
@param[out]
dB_dst Destination array of dimension (lddb,n), on GPU device.
@param[in]
lddb Leading dimension of matrix B. lddb >= m.
@param[in]
queue Queue to execute in.
@ingroup magma_setmatrix
*******************************************************************************/
extern "C" void
magma_setmatrix_async_internal(
magma_int_t m, magma_int_t n, magma_int_t elemSize,
void const* hA_src, magma_int_t lda,
magma_ptr dB_dst, magma_int_t lddb,
magma_queue_t queue,
const char* func, const char* file, int line )
{
// for backwards compatability, accepts NULL queue to mean NULL stream.
cudaStream_t stream = NULL;
if ( queue != NULL ) {
stream = queue->cuda_stream();
}
else {
fprintf( stderr, "Warning: %s got NULL queue\n", __func__ );
}
cublasStatus_t status;
status = cublasSetMatrixAsync(
int(m), int(n), int(elemSize),
hA_src, int(lda),
dB_dst, int(lddb), stream );
check_xerror( status, func, file, line );
}
示例9: if
extern "C" magma_int_t
magma_d_spmv(
double alpha,
magma_d_matrix A,
magma_d_matrix x,
double beta,
magma_d_matrix y,
magma_queue_t queue )
{
magma_int_t info = 0;
magma_d_matrix x2={Magma_CSR};
cusparseHandle_t cusparseHandle = 0;
cusparseMatDescr_t descr = 0;
// make sure RHS is a dense matrix
if ( x.storage_type != Magma_DENSE ) {
printf("error: only dense vectors are supported for SpMV.\n");
info = MAGMA_ERR_NOT_SUPPORTED;
goto cleanup;
}
if ( A.memory_location != x.memory_location ||
x.memory_location != y.memory_location ) {
printf("error: linear algebra objects are not located in same memory!\n");
printf("memory locations are: %d %d %d\n",
A.memory_location, x.memory_location, y.memory_location );
info = MAGMA_ERR_INVALID_PTR;
goto cleanup;
}
// DEV case
if ( A.memory_location == Magma_DEV ) {
if ( A.num_cols == x.num_rows && x.num_cols == 1 ) {
if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR
|| A.storage_type == Magma_CSRL
|| A.storage_type == Magma_CSRU ) {
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descr ));
CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO ));
cusparseDcsrmv( cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE,
A.num_rows, A.num_cols, A.nnz, &alpha, descr,
A.dval, A.drow, A.dcol, x.dval, &beta, y.dval );
}
else if ( A.storage_type == Magma_ELL ) {
//printf("using ELLPACKT kernel for SpMV: ");
CHECK( magma_dgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta,
y.dval, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_ELLPACKT ) {
//printf("using ELL kernel for SpMV: ");
CHECK( magma_dgeellmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta,
y.dval, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_ELLRT ) {
//printf("using ELLRT kernel for SpMV: ");
CHECK( magma_dgeellrtmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.max_nnz_row, alpha, A.dval, A.dcol, A.drow, x.dval,
beta, y.dval, A.alignment, A.blocksize, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_SELLP ) {
//printf("using SELLP kernel for SpMV: ");
CHECK( magma_dgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols,
A.blocksize, A.numblocks, A.alignment,
alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_DENSE ) {
//printf("using DENSE kernel for SpMV: ");
magmablas_dgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha,
A.dval, A.num_rows, x.dval, 1, beta, y.dval,
1, queue );
//printf("done.\n");
}
else if ( A.storage_type == Magma_SPMVFUNCTION ) {
//printf("using DENSE kernel for SpMV: ");
CHECK( magma_dcustomspmv( alpha, x, beta, y, queue ));
//printf("done.\n");
}
else if ( A.storage_type == Magma_BCSR ) {
//printf("using CUSPARSE BCSR kernel for SpMV: ");
// CUSPARSE context //
cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW;
int mb = magma_ceildiv( A.num_rows, A.blocksize );
int nb = magma_ceildiv( A.num_cols, A.blocksize );
CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descr ));
cusparseDbsrmv( cusparseHandle, dirA,
CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks,
//.........这里部分代码省略.........
示例10: if
extern "C" magma_int_t
magma_cmtransposeconjugate(
magma_c_matrix A,
magma_c_matrix *B,
magma_queue_t queue )
{
// for symmetric matrices: convert to csc using cusparse
magma_int_t info = 0;
cusparseHandle_t handle=NULL;
cusparseMatDescr_t descrA=NULL;
cusparseMatDescr_t descrB=NULL;
magma_c_matrix ACSR={Magma_CSR}, BCSR={Magma_CSR};
magma_c_matrix A_d={Magma_CSR}, B_d={Magma_CSR};
if( A.storage_type == Magma_CSR && A.memory_location == Magma_DEV ) {
// fill in information for B
B->storage_type = A.storage_type;
B->diagorder_type = A.diagorder_type;
B->memory_location = Magma_DEV;
B->num_rows = A.num_cols; // transposed
B->num_cols = A.num_rows; // transposed
B->nnz = A.nnz;
B->true_nnz = A.true_nnz;
if ( A.fill_mode == MagmaFull ) {
B->fill_mode = MagmaFull;
}
else if ( A.fill_mode == MagmaLower ) {
B->fill_mode = MagmaUpper;
}
else if ( A.fill_mode == MagmaUpper ) {
B->fill_mode = MagmaLower;
}
B->dval = NULL;
B->drow = NULL;
B->dcol = NULL;
// memory allocation
CHECK( magma_cmalloc( &B->dval, B->nnz ));
CHECK( magma_index_malloc( &B->drow, B->num_rows + 1 ));
CHECK( magma_index_malloc( &B->dcol, B->nnz ));
// CUSPARSE context //
CHECK_CUSPARSE( cusparseCreate( &handle ));
CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA ));
CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB ));
CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO ));
CHECK_CUSPARSE(
cusparseCcsr2csc( handle, A.num_rows, A.num_cols, A.nnz,
A.dval, A.drow, A.dcol, B->dval, B->dcol, B->drow,
CUSPARSE_ACTION_NUMERIC,
CUSPARSE_INDEX_BASE_ZERO) );
CHECK( magma_cmconjugate( B, queue ));
} else if ( A.memory_location == Magma_CPU ){
CHECK( magma_cmtransfer( A, &A_d, A.memory_location, Magma_DEV, queue ));
CHECK( magma_cmtransposeconjugate( A_d, &B_d, queue ));
CHECK( magma_cmtransfer( B_d, B, Magma_DEV, A.memory_location, queue ));
} else {
CHECK( magma_cmconvert( A, &ACSR, A.storage_type, Magma_CSR, queue ));
CHECK( magma_cmtransposeconjugate( ACSR, &BCSR, queue ));
CHECK( magma_cmconvert( BCSR, B, Magma_CSR, A.storage_type, queue ));
}
cleanup:
cusparseDestroyMatDescr( descrA );
cusparseDestroyMatDescr( descrB );
cusparseDestroy( handle );
magma_cmfree( &A_d, queue );
magma_cmfree( &B_d, queue );
magma_cmfree( &ACSR, queue );
magma_cmfree( &BCSR, queue );
if( info != 0 ){
magma_cmfree( B, queue );
}
return info;
}