本文整理汇总了C++中OpenCLProgram::getKernel方法的典型用法代码示例。如果您正苦于以下问题:C++ OpenCLProgram::getKernel方法的具体用法?C++ OpenCLProgram::getKernel怎么用?C++ OpenCLProgram::getKernel使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类OpenCLProgram
的用法示例。
在下文中一共展示了OpenCLProgram::getKernel方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1:
OpenCLMomentumV3::OpenCLMomentumV3(int _HASH_BITS, int _device_num) {
max_threads = 1<<30; // very big
HASH_BITS = _HASH_BITS;
device_num = _device_num;
OpenCLMain& main = OpenCLMain::getInstance();
// checks if device exists
if (main.getInstance().getNumDevices() <= device_num) {
printf("ERROR: DEVICE %d does not exist. Please limit your threads to one per device.\n", device_num);
assert(false);
}
// compiles
fprintf(stdout, "Starting OpenCLMomentum V3\n");
fprintf(stdout, "Device %02d: %s\n", device_num, main.getDevice(device_num)->getName().c_str());
cl_ulong maxWorkGroupSize = main.getDevice(device_num)->getMaxWorkGroupSize();
fprintf(stdout, "Max work group size: %llu\n", maxWorkGroupSize);
if (maxWorkGroupSize < max_threads) max_threads = maxWorkGroupSize;
OpenCLContext *context = main.getDevice(device_num)->getContext();
std::vector<std::string> program_filenames;
program_filenames.push_back("opencl/opencl_cryptsha512.h");
program_filenames.push_back("opencl/cryptsha512_kernel.cl");
program_filenames.push_back("opencl/OpenCLMomentumV3.cl");
OpenCLProgram *program = context->loadProgramFromFiles(program_filenames);
// prealoc kernels
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
// only one queue, helps with memory leaking
queue = context->createCommandQueue(main.getDevice(device_num));
size_t BLOCKSIZE = max_threads;
// allocate internal structure
cl_message = context->createBuffer(sizeof(uint8_t)*32, CL_MEM_READ_ONLY, NULL);
internal_hash_table = context->createBuffer(sizeof(uint32_t)*(1<<HASH_BITS), CL_MEM_READ_WRITE, NULL);
temp_collisions = context->createBuffer(sizeof(collision_struct)*getCollisionCeiling(), CL_MEM_WRITE_ONLY, NULL);
temp_collisions_count = context->createBuffer(sizeof(size_t), CL_MEM_READ_WRITE, NULL);
// sets args
kernel_cleanup->resetArgs();
kernel_cleanup->addGlobalArg(internal_hash_table);
kernel->resetArgs();
kernel->addGlobalArg(cl_message);
kernel->addGlobalArg(internal_hash_table);
uint32_t ht_size = 1<<HASH_BITS;
kernel->addScalarUInt(ht_size);
kernel->addGlobalArg(temp_collisions);
kernel->addGlobalArg(temp_collisions_count);
}
示例2: memcpy
void OpenCLMomentumV9::find_collisions(uint8_t* message, collision_struct* collisions, size_t* collision_count) {
// temp storage
*collision_count = 0;
uint32_t ht_size = 1<<HASH_BITS;
SHA512_Context c512_avxsse;
SHA512_Init(&c512_avxsse);
uint8_t midhash[32+4];
memcpy(midhash+4, message, 32);
*((uint32_t*)midhash) = 0;
SHA512_Update_Simple(&c512_avxsse, midhash, 32+4);
SHA512_PreFinal(&c512_avxsse);
*(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0;
uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]);
for (int i = 1; i < 5; i++) {
swap_helper[i] = SWAP64(swap_helper[i]);
}
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
assert(kernel != NULL);
//size_t BLOCKSIZE = main.getPlatform(0)->getDevice(0)->getMaxWorkGroupSize();
size_t BLOCKSIZE = kernel->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
//has to be a power of 2
BLOCKSIZE = 1<<log2(BLOCKSIZE);
size_t BLOCKSIZE_CLEAN = kernel_cleanup->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
BLOCKSIZE_CLEAN = 1<<log2(BLOCKSIZE_CLEAN);
// printf("BLOCKSIZE = %ld\n", BLOCKSIZE);
// printf("BLOCKSIZE_CLEAN = %ld\n", BLOCKSIZE_CLEAN);
// cleans up the hash table
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, BLOCKSIZE_CLEAN);
queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE);
queue->enqueueWriteBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueKernel1D(kernel, MAX_MOMENTUM_NONCE/8, BLOCKSIZE);
queue->enqueueReadBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueReadBuffer(temp_collisions, collisions, sizeof(collision_struct)*getCollisionCeiling());
queue->finish();
}
示例3: sizeof
void OpenCLMomentumV3::find_collisions(uint8_t* message, collision_struct* collisions, size_t* collision_count) {
// temp storage
*collision_count = 0;
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
assert(kernel != NULL);
//size_t BLOCKSIZE = main.getPlatform(0)->getDevice(0)->getMaxWorkGroupSize();
size_t BLOCKSIZE = kernel->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
//has to be a power of 2
BLOCKSIZE = 1<<log2(BLOCKSIZE);
size_t BLOCKSIZE_CLEAN = kernel_cleanup->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
BLOCKSIZE_CLEAN = 1<<log2(BLOCKSIZE_CLEAN);
// printf("BLOCKSIZE = %ld\n", BLOCKSIZE);
// printf("BLOCKSIZE_CLEAN = %ld\n", BLOCKSIZE_CLEAN);
// cleans up the hash table
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, BLOCKSIZE_CLEAN);
queue->enqueueWriteBuffer(cl_message, message, sizeof(uint8_t)*32);
queue->enqueueWriteBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueKernel1D(kernel, MAX_MOMENTUM_NONCE/8, BLOCKSIZE);
queue->enqueueReadBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueReadBuffer(temp_collisions, collisions, sizeof(collision_struct)*getCollisionCeiling());
queue->finish();
}
示例4: sizeof
void OpenCLMomentumV8::find_collisions(uint8_t* message, collision_struct* out_buff, size_t* out_count) {
// temp storage
*out_count = 0;
uint32_t ht_size = 1<<HASH_BITS;
SHA512_Context c512_avxsse;
SHA512_Init(&c512_avxsse);
uint8_t midhash[32+4];
memcpy(midhash+4, message, 32);
*((uint32_t*)midhash) = 0;
SHA512_Update_Simple(&c512_avxsse, midhash, 32+4);
SHA512_PreFinal(&c512_avxsse);
*(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0;
uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]);
for (int i = 1; i < 5; i++) {
swap_helper[i] = SWAP64(swap_helper[i]);
}
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel_calculate_all_hashes = program->getKernel("calculate_all_hashes");
OpenCLKernel *kernel_fill_table = program->getKernel("fill_table");
OpenCLKernel *kernel_find_collisions = program->getKernel("find_collisions");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
OpenCLDevice * device = OpenCLMain::getInstance().getDevice(device_num);
// cleans up the hash table
size_t kc_wgsize = kernel_cleanup->getWorkGroupSize(device);
kc_wgsize = 1<<log2(kc_wgsize);
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, kc_wgsize);
// printf("Cleaning the HT\n");
// queue->finish();
queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE);
// step 1, calculate hashes
size_t kcah_wgsize = kernel_calculate_all_hashes->getWorkGroupSize(device);
kcah_wgsize = 1<<log2(kcah_wgsize);
queue->enqueueKernel1D(kernel_calculate_all_hashes, MAX_MOMENTUM_NONCE/8,
kcah_wgsize);
// uint64_t * apa = new uint64_t[MAX_MOMENTUM_NONCE];
// queue->enqueueReadBuffer(hashes, apa, sizeof(uint64_t)*MAX_MOMENTUM_NONCE);
// queue->finish();
//
// printf("testing hashes\n");
// uint64_t count = 0;
// for (int i = 0; i < MAX_MOMENTUM_NONCE; i++) {
// if (apa[i] == 0) {
// count++;
// printf("BAD HASH AT: %d %X\n", i, apa[i]);
// }
// }
// printf("counted %X bad hashes\n", count);
// printf("NOW REALLY TEST THEM hashes\n");
// count = 0;
// for (uint32_t i = 0; i < MAX_MOMENTUM_NONCE/8; i+=8) {
// sph_sha512_context c512_sph; //SPH
// sph_sha512_init(&c512_sph);
// sph_sha512(&c512_sph, &i, 4);
// sph_sha512(&c512_sph, message, 32);
// uint64_t out[8];
// sph_sha512_close(&c512_sph, out);
// for (int j =0; j < 8; j++) {
// if (apa[i+j] != out[j]) {
// count++;
// uint64_t xxx = apa[i+j];
// printf("BAD HASH AT: %d => %X != %X\n", i, apa[i+j], out[j]);
// }
// }
// }
// printf("counted %X bad hashes\n", count);
// step 2, populate hashtable
size_t kft_wgsize = kernel_fill_table->getWorkGroupSize(device);
kft_wgsize = 1<<log2(kft_wgsize);
queue->enqueueKernel1D(kernel_fill_table, MAX_MOMENTUM_NONCE,
kft_wgsize);
// printf("step 2, populate hashtable\n");
// queue->finish();
queue->enqueueWriteBuffer(collisions_count, out_count, sizeof(size_t));
// step 3, find collisions
size_t kfc_wgsize = kernel_find_collisions->getWorkGroupSize(device);
kfc_wgsize = 1<<log2(kfc_wgsize);
queue->enqueueKernel1D(kernel_find_collisions, MAX_MOMENTUM_NONCE,
kfc_wgsize);
// printf("step 3, find collisions\n");
// queue->finish();
queue->enqueueReadBuffer(collisions_count, out_count, sizeof(size_t));
queue->enqueueReadBuffer(collisions, out_buff, sizeof(collision_struct)*getCollisionCeiling());
// printf("step 4, copy output\n");
//.........这里部分代码省略.........
示例5: printf
//.........这里部分代码省略.........
if (target_mem > 0) {
// Convert target to bytes, subtract 1 to guarantee results LESS THAN target
uint32 target_mem_temp = (target_mem * 1024 * 1024);
// Lazy calculation, assume large bucket_size, scale back from there
bucket_size = 1024;
while (bucket_size > 0 && calc_total_mem_usage(buckets_log2, bucket_size) > target_mem_temp) { bucket_size--; }
// Make sure the parameter configuration is sane:
if (bucket_size < 1) {
printf("ERROR: Memory target of %d MB cannot be attained with 2^%d buckets!\n", target_mem, buckets_log2);
printf(" Please lower the value of \"-b\" or increase the value of \"-m\".\n");
exit(0);
}
}
// Make sure we can allocate hash_list (cannot violate CL_DEVICE_MAX_MEM_ALLOC_SIZE)
cl_ulong required_mem = calc_hash_mem_usage(buckets_log2, bucket_size);
cl_ulong available_mem = device->getMaxMemAllocSize();
if (required_mem > available_mem) {
printf("ERROR: Device %d cannot allocate 2^%d buckets of %d elements!\n", device_num, buckets_log2, bucket_size);
printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE is %d MB, this configuration requires %d MB\n", available_mem / 1024 / 1024, required_mem / 1024 / 1024);
printf(" Please lower the value of \"-b\" or \"-s\" or increase the value of \"-m\".\n");
exit(0);
}
// Make sure we can allocate nonce_map (cannot violate CL_DEVICE_MAX_MEM_ALLOC_SIZE)
required_mem = calc_index_mem_usage(buckets_log2, bucket_size);
available_mem = device->getMaxMemAllocSize();
if (required_mem > available_mem) {
printf("ERROR: Device %d cannot allocate index of 2^%d elements!\n", device_num, buckets_log2);
printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE is %d MB, this configuration requires %d MB\n", available_mem / 1024 / 1024, required_mem / 1024 / 1024);
printf(" Please lower the value of \"-b\" or increase the value of \"-m\".\n");
exit(0);
}
// Make sure the whole thing fits in memory
required_mem = calc_total_mem_usage(buckets_log2, bucket_size);
available_mem = device->getGlobalMemSize();
if (required_mem > available_mem) {
printf("ERROR: Device %d cannot store 2^%d buckets of %d elements!\n", device_num, buckets_log2, bucket_size);
printf(" CL_DEVICE_GLOBAL_MEM_SIZE is %d MB, this configuration requires %d MB\n", available_mem / 1024 / 1024, required_mem / 1024 / 1024);
printf(" Please lower the value of \"-b\" or \"-s\" or increase the value of \"-m\".\n");
exit(0);
}
// All clear, show the running parameters!
printf("Using %d work group size\n", wgs);
printf("Using vector size %d\n", vect_type);
printf("Using 2^%d buckets\n", buckets_log2);
printf("Using %d elements per bucket\n", bucket_size);
printf("Using %d MB of memory\n", required_mem / 1024 / 1024);
printf("Estimated drop percentage: %5.2f%%\n", 100 * poisson_estimate((1 << buckets_log2), MAX_MOMENTUM_NONCE, bucket_size));
printf("\n");
// Compile the OpenCL code
printf("Compiling OpenCL code... this may take 3-5 minutes\n");
bool isGPU = device->isGPU();
if (!isGPU) { gpu_watchdog_max_wait *= 6; } // Effectively disable the watchdog
std::stringstream params;
params << " -I ./opencl/";
params << " -D DEVICE_GPU=" << (isGPU ? 1 : 0);
params << " -D VECT_TYPE=" << vect_type;
params << " -D LOCAL_WGS=" << wgs;
params << " -D NUM_BUCKETS_LOG2=" << buckets_log2;
params << " -D BUCKET_SIZE=" << bucket_size;
#ifdef USE_SOURCE
std::vector<std::string> file_list;
file_list.push_back("opencl/momentum.cl");
OpenCLProgram* program = device->getContext()->loadProgramFromFiles(file_list, params.str());
#else
std::vector<std::string> input_src;
input_src.push_back(getMomentumOpenCL());
OpenCLProgram* program = device->getContext()->loadProgramFromStrings(input_src, params.str());
#endif
kernel_hash = program->getKernel("hash_step");
kernel_reset = program->getKernel("reset_and_seek");
mid_hash = device->getContext()->createBuffer(32 * sizeof(cl_uint), CL_MEM_READ_ONLY, NULL);
hash_list = device->getContext()->createBuffer(calc_hash_mem_usage(buckets_log2, bucket_size), CL_MEM_READ_WRITE, NULL);
index_list = device->getContext()->createBuffer(calc_index_mem_usage(buckets_log2, bucket_size), CL_MEM_READ_WRITE, NULL);
nonce_a = device->getContext()->createBuffer(256 * sizeof(cl_uint), CL_MEM_WRITE_ONLY, NULL);
nonce_b = device->getContext()->createBuffer(256 * sizeof(cl_uint), CL_MEM_WRITE_ONLY, NULL);
nonce_qty = device->getContext()->createBuffer(sizeof(cl_uint), CL_MEM_READ_WRITE, NULL);
q = device->getContext()->createCommandQueue(device);
}