本文整理汇总了C++中matrix_type::values方法的典型用法代码示例。如果您正苦于以下问题:C++ matrix_type::values方法的具体用法?C++ matrix_type::values怎么用?C++ matrix_type::values使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类matrix_type
的用法示例。
在下文中一共展示了matrix_type::values方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: operator
__device__
inline void operator() ( const size_type iRow ) const {
const size_type iEntryBegin = m_A.graph.row_map[iRow];
const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
for (size_type j=0; j<m_num_col; j++) {
size_type iCol = m_col_indices[j];
scalar_type sum = 0.0;
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
sum += m_A.values(iEntry) * m_x( m_A.graph.entries(iEntry), iCol );
}
m_y( iRow, iCol ) = sum;
}
}
示例2: operator
__device__
void operator()(void) const
{
const size_type WarpSize = Kokkos::Impl::CudaTraits::WarpSize;
// Number of bases in the stochastic system:
const size_type dim = m_A.block.dimension();
// Number of Cijk tiles
const size_type n_tile = m_A.block.num_tiles();
const size_type tile_size = m_A.block.tile_size();
const size_type tile_dim = n_tile == 1 ? dim : tile_size;
//const size_type tile_dim = tile_size;
VectorScalar * const sh_x_k =
kokkos_impl_cuda_shared_memory<VectorScalar>();
VectorScalar * const sh_x_j =
n_tile == 1 ? sh_x_k : sh_x_k + m_block_size*tile_dim;
VectorScalar * const sh_A_k =
sh_x_j + m_block_size*tile_dim;
VectorScalar * const sh_A_j =
n_tile == 1 ? sh_A_k : sh_A_k + m_block_size*tile_dim;
VectorScalar * const sh_y = sh_A_j + m_block_size*tile_dim;
volatile VectorScalar * const sh_t = sh_y + tile_dim;
const size_type nid = blockDim.x * blockDim.y;
const size_type tid = threadIdx.x + blockDim.x * threadIdx.y;
// blockIdx.x == row in the deterministic (finite element) system
const size_type iBlockEntryBeg = m_A.graph.row_map[ blockIdx.x ];
const size_type iBlockEntryEnd = m_A.graph.row_map[ blockIdx.x + 1 ];
size_type numBlock = (iBlockEntryEnd-iBlockEntryBeg) / m_block_size;
const size_type remBlock = (iBlockEntryEnd-iBlockEntryBeg) % m_block_size;
if (remBlock > 0) ++numBlock;
// Zero y
for (size_type i=tid; i<dim; i+=nid) {
m_y(i,blockIdx.x) = 0.0;
}
// Loop over Cijk tiles
for (size_type tile = 0; tile<n_tile; ++tile) {
const size_type i_offset = m_A.block.offset(tile, 0);
const size_type j_offset = m_A.block.offset(tile, 1);
const size_type k_offset = m_A.block.offset(tile, 2);
const size_type i_range = m_A.block.range(tile, 0);
const size_type j_range = m_A.block.range(tile, 1);
const size_type k_range = m_A.block.range(tile, 2);
const size_type n_row = m_A.block.num_rows(tile);
// Zero y
for (size_type i=tid; i<i_range; i+=nid) {
sh_y[i] = 0.0;
}
// Loop over finite element column blocks.
size_type iBlockEntry = iBlockEntryBeg;
for (size_type block=0; block<numBlock;
++block, iBlockEntry+=m_block_size) {
const size_type block_size =
(block == numBlock-1 && remBlock > 0) ? remBlock : m_block_size;
// Wait for X and A to be used in the previous iteration
// before reading new values.
__syncthreads();
// Coalesced read blocks of X and A into shared memory
for (size_type col=0; col<block_size; ++col) {
const size_type iBlockColumn =
m_A.graph.entries( iBlockEntry + col );
const VectorScalar * const x_k = & m_x( k_offset , iBlockColumn );
const VectorScalar * const x_j = & m_x( j_offset , iBlockColumn );
const MatrixScalar * const A_k = & m_A.values( k_offset , iBlockEntry + col );
const MatrixScalar * const A_j = & m_A.values( j_offset , iBlockEntry + col );
for (size_type j=tid; j<j_range; j+=nid) {
sh_x_j[col+j*m_block_size] = x_j[j];
sh_A_j[col+j*m_block_size] = A_j[j];
}
if (n_tile > 1) {
for (size_type k=tid; k<k_range; k+=nid) {
sh_x_k[col+k*m_block_size] = x_k[k];
sh_A_k[col+k*m_block_size] = A_k[k];
}
}
}
__syncthreads(); // wait for X and A to be read
// Loop over stochastic rows in this tile
for (size_type i=threadIdx.y; i<i_range; i+=blockDim.y) {
VectorScalar s = 0;
// Product tensor entries which this warp will iterate:
const size_type lBeg = m_A.block.entry_begin(tile, i);
const size_type lEnd = m_A.block.entry_end(tile, i);
//.........这里部分代码省略.........
示例3: operator
__device__
void operator()(void) const
{
// Number of bases in the stochastic system:
const size_type dim = m_A.block.dimension();
const size_type max_tile_size = m_A.block.max_jk_tile_size();
volatile VectorScalar * const sh_x_k =
kokkos_impl_cuda_shared_memory<VectorScalar>();
volatile VectorScalar * const sh_x_j = sh_x_k+m_block_size*max_tile_size;
volatile VectorScalar * const sh_A_k = sh_x_j+m_block_size*max_tile_size;
volatile VectorScalar * const sh_A_j = sh_A_k+m_block_size*max_tile_size;
volatile VectorScalar * const sh_y = sh_A_j+m_block_size*max_tile_size;
const size_type nid = blockDim.x * blockDim.y;
const size_type tid = threadIdx.x + blockDim.x * threadIdx.y;
// blockIdx.x == row in the deterministic (finite element) system
const size_type iBlockEntryBeg = m_A.graph.row_map[ blockIdx.x ];
const size_type iBlockEntryEnd = m_A.graph.row_map[ blockIdx.x + 1 ];
size_type numBlock = (iBlockEntryEnd-iBlockEntryBeg) / m_block_size;
const size_type remBlock = (iBlockEntryEnd-iBlockEntryBeg) % m_block_size;
if (remBlock > 0) ++numBlock;
// Loop over i tiles
const size_type n_i_tile = m_A.block.num_i_tiles();
for (size_type i_tile = 0; i_tile<n_i_tile; ++i_tile) {
const size_type i_begin = m_A.block.i_begin(i_tile);
const size_type i_size = m_A.block.i_size(i_tile);
// Zero y
for (size_type i=tid; i<i_size; i+=nid) {
sh_y[i] = 0.0;
}
// Loop over finite element column blocks.
size_type iBlockEntry = iBlockEntryBeg;
for (size_type block=0; block<numBlock; ++block,
iBlockEntry+=m_block_size) {
const size_type block_size =
(block == numBlock-1 && remBlock > 0) ? remBlock : m_block_size;
// Loop over j tiles
const size_type n_j_tile = m_A.block.num_j_tiles(i_tile);
for (size_type j_tile = 0; j_tile<n_j_tile; ++j_tile) {
const size_type j_begin = m_A.block.j_begin(i_tile, j_tile);
const size_type j_size = m_A.block.j_size(i_tile, j_tile);
// Wait for X and A to be used in the previous iteration
// before reading new values.
__syncthreads();
// Coalesced read j-blocks of X and A into shared memory
for (size_type col=0; col<block_size; ++col) {
const size_type iBlockColumn =
m_A.graph.entries(iBlockEntry + col);
const VectorScalar * const x_j =
&m_x(j_begin, iBlockColumn);
const MatrixScalar * const A_j =
&m_A.values(j_begin, iBlockEntry + col);
for (size_type j=tid; j<j_size; j+=nid) {
sh_x_j[col+j*m_block_size] = x_j[j];
sh_A_j[col+j*m_block_size] = A_j[j];
}
}
// Loop over k tiles
const size_type n_k_tile = m_A.block.num_k_tiles(i_tile, j_tile);
for (size_type k_tile = 0; k_tile<n_k_tile; ++k_tile) {
const size_type k_begin =
m_A.block.k_begin(i_tile, j_tile, k_tile);
const size_type k_size =
m_A.block.k_size(i_tile, j_tile, k_tile);
// Wait for X and A to be used in the previous iteration
// before reading new values.
__syncthreads();
// Coalesced read j-blocks of X and A into shared memory
for (size_type col=0; col<block_size; ++col) {
const size_type iBlockColumn =
m_A.graph.entries(iBlockEntry + col);
const VectorScalar * const x_k =
&m_x(k_begin, iBlockColumn);
const MatrixScalar * const A_k =
&m_A.values(k_begin, iBlockEntry + col);
for (size_type k=tid; k<k_size; k+=nid) {
sh_x_k[col+k*m_block_size] = x_k[k];
sh_A_k[col+k*m_block_size] = A_k[k];
}
}
__syncthreads(); // wait for X and A to be read
// Loop over stochastic rows in this tile
for (size_type i=threadIdx.y; i<i_size; i+=blockDim.y) {
VectorScalar s = 0;
// Product tensor entries which this warp will iterate:
const size_type lBeg =
//.........这里部分代码省略.........