本文整理汇总了C++中Workspace::transfer方法的典型用法代码示例。如果您正苦于以下问题:C++ Workspace::transfer方法的具体用法?C++ Workspace::transfer怎么用?C++ Workspace::transfer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Workspace
的用法示例。
在下文中一共展示了Workspace::transfer方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: GPUQREngine_Cleanup
QREngineResultCode GPUQREngine
(
size_t gpuMemorySize, // The total available GPU memory size in bytes
Front *userFronts, // The list of fronts to factorize
Int numFronts, // The number of fronts to factorize
QREngineStats *stats // An optional parameter. If present, statistics
// are collected and passed back to the caller
// via this struct
)
{
/* Allocate workspaces */
Front *fronts = (Front*) SuiteSparse_calloc(numFronts, sizeof(Front));
if(!fronts)
{
return QRENGINE_OUTOFMEMORY;
}
size_t FSize, RSize;
FSize = RSize = 0;
for(int f=0; f<numFronts; f++)
{
/* Configure the front */
Front *userFront = &(userFronts[f]);
Int m = userFront->fm;
Int n = userFront->fn;
Front *front = new (&fronts[f]) Front(f, EMPTY, m, n);
FSize += front->getNumFrontValues();
RSize += front->getNumRValues();
}
// We have to allocate page-locked CPU-GPU space to leverage asynchronous
// memory transfers. This has to be done in a way that the CUDA driver is
// aware of, which unfortunately means making a copy of the user input.
// calloc pagelocked space on CPU, and calloc space on the GPU
Workspace *wsMongoF = Workspace::allocate(FSize, // CPU and GPU
sizeof(double), true, true, true, true);
// calloc pagelocked space on the CPU. Nothing on the GPU
Workspace *wsMongoR = Workspace::allocate(RSize, // CPU
sizeof(double), true, true, false, true);
/* Cleanup and return if we ran out of memory. */
if(!wsMongoF || !wsMongoR)
{
return GPUQREngine_Cleanup (QRENGINE_OUTOFMEMORY,
userFronts, fronts, numFronts, wsMongoF, wsMongoR);
}
/* Prepare the fronts for GPU execution. */
size_t FOffset, ROffset;
FOffset = ROffset = 0;
for(int f=0; f<numFronts; f++)
{
// Set the front pointers; make the copy from user data into front data.
Front *front = &(fronts[f]);
front->F = CPU_REFERENCE(wsMongoF, double*) + FOffset;
front->gpuF = GPU_REFERENCE(wsMongoF, double*) + FOffset;
front->cpuR = CPU_REFERENCE(wsMongoR, double*) + ROffset;
FOffset += front->getNumFrontValues();
ROffset += front->getNumRValues();
/* COPY USER DATA (user's F to our F) */
Front *userFront = &(userFronts[f]);
double *userF = userFront->F;
double *F = front->F;
Int m = userFront->fm;
Int n = userFront->fn;
bool isColMajor = userFront->isColMajor;
Int ldn = userFront->ldn;
for(Int i=0; i<m; i++)
{
for(Int j=0; j<n; j++)
{
F[i*n+j] = (isColMajor ? userF[j*ldn+i] : userF[i*ldn+j]);
}
}
/* Attach either the user-specified Stair, or compute it. */
front->Stair = userFront->Stair;
if(!front->Stair) front->Stair = GPUQREngine_FindStaircase(front);
/* Cleanup and return if we ran out of memory building the staircase */
if(!front->Stair)
{
return GPUQREngine_Cleanup (QRENGINE_OUTOFMEMORY,
userFronts, fronts, numFronts, wsMongoF, wsMongoR);
}
}
/* Transfer the fronts to the GPU. */
if(!wsMongoF->transfer(cudaMemcpyHostToDevice))
{
return GPUQREngine_Cleanup (QRENGINE_GPUERROR,
userFronts, fronts, numFronts, wsMongoF, wsMongoR);
}
/* Do the factorization for this set of fronts. */
QREngineResultCode result = GPUQREngine_Internal(gpuMemorySize, fronts,
numFronts, NULL, NULL, NULL, stats);
//.........这里部分代码省略.........