本文整理汇总了C++中Plan::GetLog2SubclusterSize方法的典型用法代码示例。如果您正苦于以下问题:C++ Plan::GetLog2SubclusterSize方法的具体用法?C++ Plan::GetLog2SubclusterSize怎么用?C++ Plan::GetLog2SubclusterSize使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Plan
的用法示例。
在下文中一共展示了Plan::GetLog2SubclusterSize方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: weightGridList
//.........这里部分代码省略.........
for( size_t j=0; j<d; ++j )
{
globalA[j] = A[j] +
(myTBoxCoords[j]<<log2LocalTBoxesPerDim[j]);
x0Ap[j] = tBox.offsets[j] + (globalA[j]|1)*wA[j];
ARelativeToAp |= (globalA[j]&1)<<j;
}
#ifdef TIMING
bfly::L2LTimer.Start();
#endif
bfly::L2L
( context, plan, phase, level,
ARelativeToAp, x0A, x0Ap, p0B, wA, wB,
parentIOffset, weightGridList,
partialWeightGridList[tIndex] );
#ifdef TIMING
bfly::L2LTimer.Stop();
#endif
}
}
// Scatter the summation of the weights
#ifdef TIMING
bfly::sumScatterTimer.Start();
#endif
const size_t recvSize = 2*weightGridList.Length()*q_to_d;
// Currently two types of planned communication are supported, as
// they are the only required types for transforming and inverting
// the transform:
// 1) partitions of dimensions 0 -> c
// 2) partitions of dimensions c -> d-1
// Both 1 and 2 include partitioning 0 -> d-1, but, in general,
// the second category never requires packing.
const size_t log2SubclusterSize = plan.GetLog2SubclusterSize(level);
if( log2SubclusterSize == 0 )
{
MPI_Comm clusterComm = plan.GetClusterComm( level );
SumScatter
( partialWeightGridList.Buffer(), weightGridList.Buffer(),
recvSize, clusterComm );
}
else
{
const size_t log2NumSubclusters =
log2NumMerging-log2SubclusterSize;
const size_t numSubclusters = 1u<<log2NumSubclusters;
const size_t subclusterSize = 1u<<log2SubclusterSize;
const size_t numChunksPerProcess = subclusterSize;
const size_t chunkSize = recvSize / numChunksPerProcess;
const R* partialBuffer = partialWeightGridList.Buffer();
vector<R> sendBuffer( recvSize<<log2NumMerging );
for( size_t sc=0; sc<numSubclusters; ++sc )
{
R* subclusterSendBuffer =
&sendBuffer[sc*subclusterSize*recvSize];
const R* subclusterPartialBuffer =
&partialBuffer[sc*subclusterSize*recvSize];
for( size_t p=0; p<subclusterSize; ++p )
{
R* processSend = &subclusterSendBuffer[p*recvSize];
for( size_t c=0; c<numChunksPerProcess; ++c )
{
memcpy
( &processSend[c*chunkSize],
&subclusterPartialBuffer