本文整理汇总了C++中OpenCLContext类的典型用法代码示例。如果您正苦于以下问题:C++ OpenCLContext类的具体用法?C++ OpenCLContext怎么用?C++ OpenCLContext使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了OpenCLContext类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: create
void OpenCLImage2D :: create(
OpenCLContext &ctx,
cl_mem_flags usage,
AlloArray *array
) {
destroy();
detach();
usage = OpenCLMemoryBuffer::check_memory_flags(usage, array->data.ptr);
bool at_least_2d = array->header.dimcount >= 2;
size_t width = array->header.dim[0];
size_t height = at_least_2d ? array->header.dim[1] : 1;
size_t rowstride = at_least_2d ? array->header.stride[1] : allo_array_size(array);
cl_image_format format = OpenCLImageFormat::format_from_array(array);
cl_int res = CL_SUCCESS;
cl_mem mem = clCreateImage2D(
ctx.get_context(),
usage,
&format,
width,
height,
rowstride,
array->data.ptr,
&res
);
if(opencl_error(res, "clCreateImage2D error creating buffer")) {
return;
}
mMem = mem;
ctx.attach_resource(this);
}
示例2: compute
void FftClFft::
compute( Tfr::ChunkData::Ptr input, Tfr::ChunkData::Ptr output, FftDirection direction )
{
TIME_STFT TaskTimer tt("Fft ClFft");
unsigned n = input->getNumberOfElements().width;
unsigned N = output->getNumberOfElements().width;
if (-1 != direction)
EXCEPTION_ASSERT( n == N );
{
TIME_STFT TaskTimer tt("Computing fft(N=%u, n=%u, direction=%d)", N, n, direction);
OpenCLContext *opencl = &OpenCLContext::Singleton();
cl_int fft_error;
clFFT_Plan plan = CLFFTKernelBuffer::Singleton().getPlan(opencl->getContext(), n, fft_error);
if (fft_error != CL_SUCCESS)
throw std::runtime_error("Could not create clFFT compute plan.");
// Run the fft in OpenCL :)
// fft kernel needs to have read/write access to output data
fft_error |= clFFT_ExecuteInterleaved(
opencl->getCommandQueue(),
plan, 1, (clFFT_Direction)direction,
OpenClMemoryStorage::ReadOnly<1>( input ).ptr(),
OpenClMemoryStorage::ReadWrite<1>( output ).ptr(),
0, NULL, NULL );
if (fft_error != CL_SUCCESS)
throw std::runtime_error("Bad stuff happened during FFT computation.");
}
}
示例3: create
void OpenCLCommandQueue :: create(OpenCLContext &ctx, const OpenCLDevice &dev, bool ordered, bool profiling) {
destroy();
detach();
cl_command_queue_properties properties = 0;
if(profiling) {
properties |= CL_QUEUE_PROFILING_ENABLE;
}
if(! ordered) {
if(GET_FLAG(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, dev.get_queue_properties())) {
properties |= CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
}
else {
opencl_error(USER_OPENCL_ERROR, "Device doesn't support out of order execution ... disabling");
}
}
cl_int res = CL_SUCCESS;
cl_command_queue command_queue = clCreateCommandQueue(
ctx.get_context(),
dev.get_device(),
properties,
&res
);
if(opencl_error(res, "clCreateCommandQueue error creating command queue")) {
return;
}
mCommandQueue = command_queue;
ctx.attach_resource(this);
}
示例4: context
OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait),
dataRange(NULL), bucketOfElement(NULL), offsetInBucket(NULL), bucketOffset(NULL), buckets(NULL), dataLength(length) {
// Create kernels.
std::map<std::string, std::string> replacements;
replacements["DATA_TYPE"] = trait->getDataType();
replacements["KEY_TYPE"] = trait->getKeyType();
replacements["SORT_KEY"] = trait->getSortKey();
replacements["MIN_KEY"] = trait->getMinKey();
replacements["MAX_KEY"] = trait->getMaxKey();
replacements["MAX_VALUE"] = trait->getMaxValue();
replacements["VALUE_IS_INT2"] = (trait->getDataType() == std::string("int2") ? "1" : "0");
cl::Program program = context.createProgram(context.replaceStrings(OpenCLKernelSources::sort, replacements));
shortListKernel = cl::Kernel(program, "sortShortList");
computeRangeKernel = cl::Kernel(program, "computeRange");
assignElementsKernel = cl::Kernel(program, "assignElementsToBuckets");
computeBucketPositionsKernel = cl::Kernel(program, "computeBucketPositions");
copyToBucketsKernel = cl::Kernel(program, "copyDataToBuckets");
sortBucketsKernel = cl::Kernel(program, "sortBuckets");
// Work out the work group sizes for various kernels.
unsigned int maxGroupSize = std::min(256, (int) context.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
int maxSharedMem = context.getDevice().getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
unsigned int maxLocalBuffer = (unsigned int) ((maxSharedMem/trait->getDataSize())/2);
unsigned int maxRangeSize = std::min(maxGroupSize, (unsigned int) computeRangeKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()));
unsigned int maxPositionsSize = std::min(maxGroupSize, (unsigned int) computeBucketPositionsKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()));
unsigned int maxShortListSize = shortListKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice());
// On Qualcomm's OpenCL, it's essential to check against maxShortListSize. Otherwise you get a crash.
// But AMD's OpenCL returns an inappropriately small value for it that is much shorter than the actual
// maximum, so including the check hurts performance. For the moment I'm going to just comment it out.
// If we officially support Qualcomm in the future, we'll need to do something better.
isShortList = (length <= maxLocalBuffer/* && length < maxShortListSize*/);
for (rangeKernelSize = 1; rangeKernelSize*2 <= maxRangeSize; rangeKernelSize *= 2)
;
positionsKernelSize = std::min(rangeKernelSize, maxPositionsSize);
sortKernelSize = (isShortList ? rangeKernelSize : rangeKernelSize/2);
if (rangeKernelSize > length)
rangeKernelSize = length;
if (sortKernelSize > maxLocalBuffer)
sortKernelSize = maxLocalBuffer;
unsigned int targetBucketSize = sortKernelSize/2;
unsigned int numBuckets = length/targetBucketSize;
if (numBuckets < 1)
numBuckets = 1;
if (positionsKernelSize > numBuckets)
positionsKernelSize = numBuckets;
// Create workspace arrays.
if (!isShortList) {
dataRange = new OpenCLArray(context, 2, trait->getKeySize(), "sortDataRange");
bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket");
buckets = new OpenCLArray(context, length, trait->getDataSize(), "buckets");
}
}
示例5: SHA512_Init
void OpenCLMomentumV9::find_collisions(uint8_t* message, collision_struct* collisions, size_t* collision_count) {
// temp storage
*collision_count = 0;
uint32_t ht_size = 1<<HASH_BITS;
SHA512_Context c512_avxsse;
SHA512_Init(&c512_avxsse);
uint8_t midhash[32+4];
memcpy(midhash+4, message, 32);
*((uint32_t*)midhash) = 0;
SHA512_Update_Simple(&c512_avxsse, midhash, 32+4);
SHA512_PreFinal(&c512_avxsse);
*(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0;
uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]);
for (int i = 1; i < 5; i++) {
swap_helper[i] = SWAP64(swap_helper[i]);
}
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
assert(kernel != NULL);
//size_t BLOCKSIZE = main.getPlatform(0)->getDevice(0)->getMaxWorkGroupSize();
size_t BLOCKSIZE = kernel->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
//has to be a power of 2
BLOCKSIZE = 1<<log2(BLOCKSIZE);
size_t BLOCKSIZE_CLEAN = kernel_cleanup->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
BLOCKSIZE_CLEAN = 1<<log2(BLOCKSIZE_CLEAN);
// printf("BLOCKSIZE = %ld\n", BLOCKSIZE);
// printf("BLOCKSIZE_CLEAN = %ld\n", BLOCKSIZE_CLEAN);
// cleans up the hash table
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, BLOCKSIZE_CLEAN);
queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE);
queue->enqueueWriteBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueKernel1D(kernel, MAX_MOMENTUM_NONCE/8, BLOCKSIZE);
queue->enqueueReadBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueReadBuffer(temp_collisions, collisions, sizeof(collision_struct)*getCollisionCeiling());
queue->finish();
}
示例6: context
OpenCLSort::OpenCLSort(OpenCLContext& context, SortTrait* trait, unsigned int length) : context(context), trait(trait),
dataRange(NULL), bucketOfElement(NULL), offsetInBucket(NULL), bucketOffset(NULL), buckets(NULL), dataLength(length) {
// Create kernels.
std::map<std::string, std::string> replacements;
replacements["DATA_TYPE"] = trait->getDataType();
replacements["KEY_TYPE"] = trait->getKeyType();
replacements["SORT_KEY"] = trait->getSortKey();
replacements["MIN_KEY"] = trait->getMinKey();
replacements["MAX_KEY"] = trait->getMaxKey();
replacements["MAX_VALUE"] = trait->getMaxValue();
replacements["VALUE_IS_INT2"] = (trait->getDataType() == std::string("int2") ? "1" : "0");
cl::Program program = context.createProgram(context.replaceStrings(OpenCLKernelSources::sort, replacements));
shortListKernel = cl::Kernel(program, "sortShortList");
computeRangeKernel = cl::Kernel(program, "computeRange");
assignElementsKernel = cl::Kernel(program, "assignElementsToBuckets");
computeBucketPositionsKernel = cl::Kernel(program, "computeBucketPositions");
copyToBucketsKernel = cl::Kernel(program, "copyDataToBuckets");
sortBucketsKernel = cl::Kernel(program, "sortBuckets");
// Work out the work group sizes for various kernels.
unsigned int maxGroupSize = std::min(256, (int) context.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
int maxSharedMem = context.getDevice().getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
unsigned int maxLocalBuffer = (unsigned int) ((maxSharedMem/trait->getDataSize())/2);
isShortList = (length <= maxLocalBuffer);
for (rangeKernelSize = 1; rangeKernelSize*2 <= maxGroupSize; rangeKernelSize *= 2)
;
positionsKernelSize = rangeKernelSize;
sortKernelSize = (isShortList ? rangeKernelSize : rangeKernelSize/2);
if (rangeKernelSize > length)
rangeKernelSize = length;
if (sortKernelSize > maxLocalBuffer)
sortKernelSize = maxLocalBuffer;
unsigned int targetBucketSize = sortKernelSize/2;
unsigned int numBuckets = length/targetBucketSize;
if (numBuckets < 1)
numBuckets = 1;
if (positionsKernelSize > numBuckets)
positionsKernelSize = numBuckets;
// Create workspace arrays.
if (!isShortList) {
dataRange = new OpenCLArray(context, 2, trait->getKeySize(), "sortDataRange");
bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket");
buckets = new OpenCLArray(context, length, trait->getDataSize(), "buckets");
}
}
示例7: OpenMMException
OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) :
context(context), size(size), elementSize(elementSize), name(name), ownsBuffer(true) {
try {
buffer = new cl::Buffer(context.getContext(), flags, size*elementSize);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error creating array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
示例8: printf
OpenCLMomentumV3::OpenCLMomentumV3(int _HASH_BITS, int _device_num) {
max_threads = 1<<30; // very big
HASH_BITS = _HASH_BITS;
device_num = _device_num;
OpenCLMain& main = OpenCLMain::getInstance();
// checks if device exists
if (main.getInstance().getNumDevices() <= device_num) {
printf("ERROR: DEVICE %d does not exist. Please limit your threads to one per device.\n", device_num);
assert(false);
}
// compiles
fprintf(stdout, "Starting OpenCLMomentum V3\n");
fprintf(stdout, "Device %02d: %s\n", device_num, main.getDevice(device_num)->getName().c_str());
cl_ulong maxWorkGroupSize = main.getDevice(device_num)->getMaxWorkGroupSize();
fprintf(stdout, "Max work group size: %llu\n", maxWorkGroupSize);
if (maxWorkGroupSize < max_threads) max_threads = maxWorkGroupSize;
OpenCLContext *context = main.getDevice(device_num)->getContext();
std::vector<std::string> program_filenames;
program_filenames.push_back("opencl/opencl_cryptsha512.h");
program_filenames.push_back("opencl/cryptsha512_kernel.cl");
program_filenames.push_back("opencl/OpenCLMomentumV3.cl");
OpenCLProgram *program = context->loadProgramFromFiles(program_filenames);
// prealoc kernels
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
// only one queue, helps with memory leaking
queue = context->createCommandQueue(main.getDevice(device_num));
size_t BLOCKSIZE = max_threads;
// allocate internal structure
cl_message = context->createBuffer(sizeof(uint8_t)*32, CL_MEM_READ_ONLY, NULL);
internal_hash_table = context->createBuffer(sizeof(uint32_t)*(1<<HASH_BITS), CL_MEM_READ_WRITE, NULL);
temp_collisions = context->createBuffer(sizeof(collision_struct)*getCollisionCeiling(), CL_MEM_WRITE_ONLY, NULL);
temp_collisions_count = context->createBuffer(sizeof(size_t), CL_MEM_READ_WRITE, NULL);
// sets args
kernel_cleanup->resetArgs();
kernel_cleanup->addGlobalArg(internal_hash_table);
kernel->resetArgs();
kernel->addGlobalArg(cl_message);
kernel->addGlobalArg(internal_hash_table);
uint32_t ht_size = 1<<HASH_BITS;
kernel->addScalarUInt(ht_size);
kernel->addGlobalArg(temp_collisions);
kernel->addGlobalArg(temp_collisions_count);
}
示例9: sizeof
void OpenCLMomentumV3::find_collisions(uint8_t* message, collision_struct* collisions, size_t* collision_count) {
// temp storage
*collision_count = 0;
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
assert(kernel != NULL);
//size_t BLOCKSIZE = main.getPlatform(0)->getDevice(0)->getMaxWorkGroupSize();
size_t BLOCKSIZE = kernel->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
//has to be a power of 2
BLOCKSIZE = 1<<log2(BLOCKSIZE);
size_t BLOCKSIZE_CLEAN = kernel_cleanup->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
BLOCKSIZE_CLEAN = 1<<log2(BLOCKSIZE_CLEAN);
// printf("BLOCKSIZE = %ld\n", BLOCKSIZE);
// printf("BLOCKSIZE_CLEAN = %ld\n", BLOCKSIZE_CLEAN);
// cleans up the hash table
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, BLOCKSIZE_CLEAN);
queue->enqueueWriteBuffer(cl_message, message, sizeof(uint8_t)*32);
queue->enqueueWriteBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueKernel1D(kernel, MAX_MOMENTUM_NONCE/8, BLOCKSIZE);
queue->enqueueReadBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueReadBuffer(temp_collisions, collisions, sizeof(collision_struct)*getCollisionCeiling());
queue->finish();
}
示例10: context
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), cutoff(-1.0), useCutoff(false), anyExclusions(false), usePadding(true),
numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), nonbondedForceGroup(0) {
// Decide how many thread blocks and force buffers to use.
deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (deviceIsCpu) {
numForceThreadBlocks = context.getNumThreadBlocks();
forceThreadBlockSize = 1;
numForceBuffers = numForceThreadBlocks;
}
else if (context.getSIMDWidth() == 32) {
if (context.getSupports64BitGlobalAtomics()) {
numForceThreadBlocks = 4*context.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
forceThreadBlockSize = 256;
// Even though using longForceBuffer, still need a single forceBuffer for the reduceForces kernel to convert the long results into float4 which will be used by later kernels.
numForceBuffers = 1;
}
else {
numForceThreadBlocks = 3*context.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
forceThreadBlockSize = 256;
numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize;
}
}
else {
numForceThreadBlocks = context.getNumThreadBlocks();
forceThreadBlockSize = (context.getSIMDWidth() >= 32 ? OpenCLContext::ThreadBlockSize : 32);
if (context.getSupports64BitGlobalAtomics()) {
// Even though using longForceBuffer, still need a single forceBuffer for the reduceForces kernel to convert the long results into float4 which will be used by later kernels.
numForceBuffers = 1;
}
else {
numForceBuffers = numForceThreadBlocks*forceThreadBlockSize/OpenCLContext::TileSize;
}
}
}
示例11: SHA512_Init
void OpenCLMomentumV8::find_collisions(uint8_t* message, collision_struct* out_buff, size_t* out_count) {
// temp storage
*out_count = 0;
uint32_t ht_size = 1<<HASH_BITS;
SHA512_Context c512_avxsse;
SHA512_Init(&c512_avxsse);
uint8_t midhash[32+4];
memcpy(midhash+4, message, 32);
*((uint32_t*)midhash) = 0;
SHA512_Update_Simple(&c512_avxsse, midhash, 32+4);
SHA512_PreFinal(&c512_avxsse);
*(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0;
uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]);
for (int i = 1; i < 5; i++) {
swap_helper[i] = SWAP64(swap_helper[i]);
}
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel_calculate_all_hashes = program->getKernel("calculate_all_hashes");
OpenCLKernel *kernel_fill_table = program->getKernel("fill_table");
OpenCLKernel *kernel_find_collisions = program->getKernel("find_collisions");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
OpenCLDevice * device = OpenCLMain::getInstance().getDevice(device_num);
// cleans up the hash table
size_t kc_wgsize = kernel_cleanup->getWorkGroupSize(device);
kc_wgsize = 1<<log2(kc_wgsize);
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, kc_wgsize);
// printf("Cleaning the HT\n");
// queue->finish();
queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE);
// step 1, calculate hashes
size_t kcah_wgsize = kernel_calculate_all_hashes->getWorkGroupSize(device);
kcah_wgsize = 1<<log2(kcah_wgsize);
queue->enqueueKernel1D(kernel_calculate_all_hashes, MAX_MOMENTUM_NONCE/8,
kcah_wgsize);
// uint64_t * apa = new uint64_t[MAX_MOMENTUM_NONCE];
// queue->enqueueReadBuffer(hashes, apa, sizeof(uint64_t)*MAX_MOMENTUM_NONCE);
// queue->finish();
//
// printf("testing hashes\n");
// uint64_t count = 0;
// for (int i = 0; i < MAX_MOMENTUM_NONCE; i++) {
// if (apa[i] == 0) {
// count++;
// printf("BAD HASH AT: %d %X\n", i, apa[i]);
// }
// }
// printf("counted %X bad hashes\n", count);
// printf("NOW REALLY TEST THEM hashes\n");
// count = 0;
// for (uint32_t i = 0; i < MAX_MOMENTUM_NONCE/8; i+=8) {
// sph_sha512_context c512_sph; //SPH
// sph_sha512_init(&c512_sph);
// sph_sha512(&c512_sph, &i, 4);
// sph_sha512(&c512_sph, message, 32);
// uint64_t out[8];
// sph_sha512_close(&c512_sph, out);
// for (int j =0; j < 8; j++) {
// if (apa[i+j] != out[j]) {
// count++;
// uint64_t xxx = apa[i+j];
// printf("BAD HASH AT: %d => %X != %X\n", i, apa[i+j], out[j]);
// }
// }
// }
// printf("counted %X bad hashes\n", count);
// step 2, populate hashtable
size_t kft_wgsize = kernel_fill_table->getWorkGroupSize(device);
kft_wgsize = 1<<log2(kft_wgsize);
queue->enqueueKernel1D(kernel_fill_table, MAX_MOMENTUM_NONCE,
kft_wgsize);
// printf("step 2, populate hashtable\n");
// queue->finish();
queue->enqueueWriteBuffer(collisions_count, out_count, sizeof(size_t));
// step 3, find collisions
size_t kfc_wgsize = kernel_find_collisions->getWorkGroupSize(device);
kfc_wgsize = 1<<log2(kfc_wgsize);
queue->enqueueKernel1D(kernel_find_collisions, MAX_MOMENTUM_NONCE,
kfc_wgsize);
// printf("step 3, find collisions\n");
// queue->finish();
queue->enqueueReadBuffer(collisions_count, out_count, sizeof(size_t));
queue->enqueueReadBuffer(collisions, out_buff, sizeof(collision_struct)*getCollisionCeiling());
// printf("step 4, copy output\n");
//.........这里部分代码省略.........
示例12: setInvPeriodicBoxSizeArg
static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getInvPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
}
示例13: context
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) {
dgBlockCounts = OpenCLArray::create<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
cl::Program program = context.createProgram(OpenCLKernelSources::compact);
countKernel = cl::Kernel(program, "countElts");
moveValidKernel = cl::Kernel(program, "moveValidElementsStaged");
}
示例14: setPeriodicBoxArgs
static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) {
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getInvPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecXDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecYDouble());
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxVecZDouble());
}
else {
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getInvPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecX());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecY());
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxVecZ());
}
}