本文整理汇总了C++中kokkos::View::dimension_1方法的典型用法代码示例。如果您正苦于以下问题:C++ View::dimension_1方法的具体用法?C++ View::dimension_1怎么用?C++ View::dimension_1使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kokkos::View
的用法示例。
在下文中一共展示了View::dimension_1方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: GEMM
static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha,
Kokkos::View<Scalar***,Kokkos::LayoutLeft,Kokkos::DefaultExecutionSpace> A, Kokkos::View<Scalar***,Kokkos::LayoutLeft,Kokkos::DefaultExecutionSpace> B,
Scalar beta, Kokkos::View<Scalar***,Kokkos::LayoutLeft,Kokkos::DefaultExecutionSpace> C){
const int m = static_cast<int> (C.dimension_1()),
n = static_cast<int> (C.dimension_2 ()),
k = (transA == Teuchos::NO_TRANS ? A.dimension_2 () : A.dimension_1 ());
// printf("m:%d,n:%d,k:%d",m,n,k);
Kokkos::parallel_for(C.dimension(0),blasOpenMPBatchLeft<Scalar>(A,B,C,m,n,k,transA,transB,alpha,beta));
}
示例2: operator
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
double tmp = 0.0;
for(int j = 0; j < idx.dimension_1(); j++) {
const double val = src(idx(i,j));
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
}
dest(i) += tmp;
}
示例3: a
TEST_F( KokkosThreads, LambdaInitialize)
{
Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a( Kokkos::ViewAllocateWithoutInitializing("node views"), RUN_TIME_DIMENSION);
Kokkos::parallel_for<KOKKOS_THREAD_DEVICE>(
a.dimension_0() ,
[=](size_t i) {
for (size_t x=0; x < a.dimension_1(); ++x) {
a(i,x) = i;
}
}
);
Kokkos::View<const unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> b = a;
int num_error = 0;
// Cannot portably call a GTEST macro in parallel
// count the errors and test that they are equal to zero
Kokkos::parallel_reduce<KOKKOS_THREAD_DEVICE, int /*reduction value type */>(
b.dimension_0() ,
[](int & local_errors) // init lambda
{ local_errors = 0; } ,
[=](size_t i, int & local_errors) { // operator() lambda
for (size_t x=0; x < b.dimension_1(); ++x)
local_errors += i == b(i,x) ? 0 : 1;
} ,
[](volatile int & dst_err, volatile int const& src_err) // join lambda
{ dst_err += src_err; } ,
num_errors // where to store the result
);
EXPECT_EQ( 0, num_errors);
}
示例4: operator
KOKKOS_INLINE_FUNCTION
void operator() ( const team_member & thread) const {
int i = thread.league_rank();
// Allocate a shared array for the team.
shared_1d_int count(thread.team_shmem(),data.dimension_1());
// With each team run a parallel_for with its threads
Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,data.dimension_1()), [=] (const int& j) {
int tsum;
// Run a vector loop reduction over the inner dimension of data
// Count how many values are multiples of 4
// Every vector lane gets the same reduction value (tsum) back, it is broadcast to all vector lanes
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(thread,data.dimension_2()), [=] (const int& k, int & vsum) {
vsum+= (data(i,j,k) % 4 == 0)?1:0;
},tsum);
// Make sure only one vector lane adds the reduction value to the shared array, i.e. execute
// the next line only once PerThread
Kokkos::single(Kokkos::PerThread(thread),[=] () {
count(j) = tsum;
});
});
// Wait for all threads to finish the parallel_for so that all shared memory writes are done
thread.team_barrier();
// Check with one vector lane from each thread how many consecutive
// data segments have the same number of values divisible by 4
// The team reduction value is again broadcast to every team member (and every vector lane)
int team_sum = 0;
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(thread, data.dimension_1()-1), [=] (const int& j, int& thread_sum) {
// It is not valid to directly add to thread_sum
// Use a single function with broadcast instead
// team_sum will be used as input to the operator (i.e. it is used to initialize sum)
// the end value of sum will be broadcast to all vector lanes in the thread.
Kokkos::single(Kokkos::PerThread(thread),[=] (int& sum) {
if(count(j)==count(j+1)) sum++;
},thread_sum);
},team_sum);
// Add with one thread and vectorlane of the team the team_sum to the global value
Kokkos::single(Kokkos::PerTeam(thread),[=] () {
Kokkos::atomic_add(&gsum(),team_sum);
});
}
示例5: operator
KOKKOS_INLINE_FUNCTION
void operator()( size_t i ) const
{
if ( i < m_elem_node.dimension_0() * m_elem_node.dimension_1() ) {
const size_t ielem = i / ElemNode ;
const size_t inode = i % ElemNode ;
unsigned elem_grid[SpaceDim] ;
unsigned node_grid[SpaceDim] ;
m_box_part.uses_elem_coord( ielem , elem_grid );
enum { elem_node_scale = Order == BoxElemPart::ElemLinear ? 1 :
Order == BoxElemPart::ElemQuadratic ? 2 : 0
};
node_grid[0] = elem_node_scale * elem_grid[0] + m_elem_node_local[inode][0] ;
node_grid[1] = elem_node_scale * elem_grid[1] + m_elem_node_local[inode][1] ;
node_grid[2] = elem_node_scale * elem_grid[2] + m_elem_node_local[inode][2] ;
m_elem_node(ielem,inode) = m_box_part.local_node_id( node_grid );
}
if ( i < m_node_grid.dimension_0() ) {
unsigned node_grid[SpaceDim] ;
m_box_part.local_node_coord( i , node_grid );
m_node_grid(i,0) = node_grid[0] ;
m_node_grid(i,1) = node_grid[1] ;
m_node_grid(i,2) = node_grid[2] ;
m_coord_map( node_grid[0] ,
node_grid[1] ,
node_grid[2] ,
m_node_coord(i,0) ,
m_node_coord(i,1) ,
m_node_coord(i,2) );
}
if ( i < m_recv_node.dimension_0() ) {
m_recv_node(i,0) = m_box_part.recv_node_rank(i);
m_recv_node(i,1) = m_box_part.recv_node_count(i);
}
if ( i < m_send_node.dimension_0() ) {
m_send_node(i,0) = m_box_part.send_node_rank(i);
m_send_node(i,1) = m_box_part.send_node_count(i);
}
if ( i < m_send_node_id.dimension_0() ) {
m_send_node_id(i) = m_box_part.send_node_id(i);
}
}
示例6: BoxElemFixture
BoxElemFixture( const BoxElemPart::Decompose decompose ,
const unsigned global_size ,
const unsigned global_rank ,
const unsigned elem_nx ,
const unsigned elem_ny ,
const unsigned elem_nz ,
const float bubble_x = 1.1f ,
const float bubble_y = 1.2f ,
const float bubble_z = 1.3f )
: m_box_part( Order , decompose , global_size , global_rank , elem_nx , elem_ny , elem_nz )
, m_coord_map( m_box_part.global_coord_max(0) ,
m_box_part.global_coord_max(1) ,
m_box_part.global_coord_max(2) ,
bubble_x ,
bubble_y ,
bubble_z )
, m_node_coord( "fixture_node_coord" , m_box_part.uses_node_count() )
, m_node_grid( "fixture_node_grid" , m_box_part.uses_node_count() )
, m_elem_node( "fixture_elem_node" , m_box_part.uses_elem_count() )
, m_recv_node( "fixture_recv_node" , m_box_part.recv_node_msg_count() )
, m_send_node( "fixture_send_node" , m_box_part.send_node_msg_count() )
, m_send_node_id( "fixture_send_node_id" , m_box_part.send_node_id_count() )
{
{
const hex_data elem_data ;
for ( unsigned i = 0 ; i < ElemNode ; ++i ) {
m_elem_node_local[i][0] = elem_data.eval_map[i][0] ;
m_elem_node_local[i][1] = elem_data.eval_map[i][1] ;
m_elem_node_local[i][2] = elem_data.eval_map[i][2] ;
m_elem_node_local[i][3] = 0 ;
}
}
const size_t nwork =
std::max( m_recv_node.dimension_0() ,
std::max( m_send_node.dimension_0() ,
std::max( m_send_node_id.dimension_0() ,
std::max( m_node_grid.dimension_0() ,
m_elem_node.dimension_0() * m_elem_node.dimension_1() ))));
Kokkos::parallel_for( nwork , *this );
}
示例7: operator
KOKKOS_INLINE_FUNCTION
void operator()(const int cell) const
{
for(int i=0;i<Teuchos::as<int>(local_lids.dimension_1());i++)
local_lids(cell,i) = global_lids(cellIds(cell),i);
}
示例8: modified_gram_schmidt
void modified_gram_schmidt(
const Kokkos::View< ScalarQ ** ,
Kokkos::LayoutLeft ,
DeviceType ,
Management > & Q ,
const Kokkos::View< ScalarR ** ,
Kokkos::LayoutLeft ,
DeviceType ,
Management > & R ,
comm::Machine machine )
{
const Kokkos::ALL ALL ;
typedef Kokkos::View< ScalarQ * ,
Kokkos::LayoutLeft ,
DeviceType ,
Kokkos::MemoryUnmanaged >
vector_view_type ;
const typename
Kokkos::View< ScalarR** ,
Kokkos::LayoutLeft ,
DeviceType >::
HostMirror hostR = Kokkos::create_mirror_view( R );
const int length = Q.dimension_0();
const int count = Q.dimension_1();
for ( int j = 0 ; j < count ; ++j ) {
const vector_view_type Qj = Kokkos::subview< vector_view_type >( Q , ALL , j );
// reads += length
// writes += 0
// flops += 1 + 2 * length
const double norm_Qj = Kokkos::norm2( length , Qj , machine );
hostR(j,j) = norm_Qj ;
// reads += length
// writes += length
// flops += 1 + length
Kokkos::scale( length , 1.0 / norm_Qj , Qj );
for ( int k = j + 1 ; k < count ; ++k ) {
const vector_view_type Qk = Kokkos::subview< vector_view_type >( Q , ALL , k );
// reads += 2 * length
// writes += 0
// flops += 2 * length
const double Qj_dot_Qk =
Kokkos::dot( length , Qj , Qk , machine );
hostR(j,k) = Qj_dot_Qk ;
// reads += 2 * length
// writes += length
// flops += 2 * length
Kokkos::axpy( length , - Qj_dot_Qk , Qj , Qk );
}
}
// reads += 0
// writes += count * count
Kokkos::deep_copy( R , hostR );
}
示例9: team_shmem_size
// The functor needs to define how much shared memory it requests given a team_size.
size_t team_shmem_size( int team_size ) const {
return shared_1d_int::shmem_size(data.dimension_1());
}