本文整理汇总了C++中OpenCLProgram类的典型用法代码示例。如果您正苦于以下问题:C++ OpenCLProgram类的具体用法?C++ OpenCLProgram怎么用?C++ OpenCLProgram使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了OpenCLProgram类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: printf
OpenCLMomentumV3::OpenCLMomentumV3(int _HASH_BITS, int _device_num) {
max_threads = 1<<30; // very big
HASH_BITS = _HASH_BITS;
device_num = _device_num;
OpenCLMain& main = OpenCLMain::getInstance();
// checks if device exists
if (main.getInstance().getNumDevices() <= device_num) {
printf("ERROR: DEVICE %d does not exist. Please limit your threads to one per device.\n", device_num);
assert(false);
}
// compiles
fprintf(stdout, "Starting OpenCLMomentum V3\n");
fprintf(stdout, "Device %02d: %s\n", device_num, main.getDevice(device_num)->getName().c_str());
cl_ulong maxWorkGroupSize = main.getDevice(device_num)->getMaxWorkGroupSize();
fprintf(stdout, "Max work group size: %llu\n", maxWorkGroupSize);
if (maxWorkGroupSize < max_threads) max_threads = maxWorkGroupSize;
OpenCLContext *context = main.getDevice(device_num)->getContext();
std::vector<std::string> program_filenames;
program_filenames.push_back("opencl/opencl_cryptsha512.h");
program_filenames.push_back("opencl/cryptsha512_kernel.cl");
program_filenames.push_back("opencl/OpenCLMomentumV3.cl");
OpenCLProgram *program = context->loadProgramFromFiles(program_filenames);
// prealoc kernels
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
// only one queue, helps with memory leaking
queue = context->createCommandQueue(main.getDevice(device_num));
size_t BLOCKSIZE = max_threads;
// allocate internal structure
cl_message = context->createBuffer(sizeof(uint8_t)*32, CL_MEM_READ_ONLY, NULL);
internal_hash_table = context->createBuffer(sizeof(uint32_t)*(1<<HASH_BITS), CL_MEM_READ_WRITE, NULL);
temp_collisions = context->createBuffer(sizeof(collision_struct)*getCollisionCeiling(), CL_MEM_WRITE_ONLY, NULL);
temp_collisions_count = context->createBuffer(sizeof(size_t), CL_MEM_READ_WRITE, NULL);
// sets args
kernel_cleanup->resetArgs();
kernel_cleanup->addGlobalArg(internal_hash_table);
kernel->resetArgs();
kernel->addGlobalArg(cl_message);
kernel->addGlobalArg(internal_hash_table);
uint32_t ht_size = 1<<HASH_BITS;
kernel->addScalarUInt(ht_size);
kernel->addGlobalArg(temp_collisions);
kernel->addGlobalArg(temp_collisions_count);
}
示例2: SHA512_Init
void OpenCLMomentumV9::find_collisions(uint8_t* message, collision_struct* collisions, size_t* collision_count) {
// temp storage
*collision_count = 0;
uint32_t ht_size = 1<<HASH_BITS;
SHA512_Context c512_avxsse;
SHA512_Init(&c512_avxsse);
uint8_t midhash[32+4];
memcpy(midhash+4, message, 32);
*((uint32_t*)midhash) = 0;
SHA512_Update_Simple(&c512_avxsse, midhash, 32+4);
SHA512_PreFinal(&c512_avxsse);
*(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0;
uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]);
for (int i = 1; i < 5; i++) {
swap_helper[i] = SWAP64(swap_helper[i]);
}
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
assert(kernel != NULL);
//size_t BLOCKSIZE = main.getPlatform(0)->getDevice(0)->getMaxWorkGroupSize();
size_t BLOCKSIZE = kernel->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
//has to be a power of 2
BLOCKSIZE = 1<<log2(BLOCKSIZE);
size_t BLOCKSIZE_CLEAN = kernel_cleanup->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
BLOCKSIZE_CLEAN = 1<<log2(BLOCKSIZE_CLEAN);
// printf("BLOCKSIZE = %ld\n", BLOCKSIZE);
// printf("BLOCKSIZE_CLEAN = %ld\n", BLOCKSIZE_CLEAN);
// cleans up the hash table
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, BLOCKSIZE_CLEAN);
queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE);
queue->enqueueWriteBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueKernel1D(kernel, MAX_MOMENTUM_NONCE/8, BLOCKSIZE);
queue->enqueueReadBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueReadBuffer(temp_collisions, collisions, sizeof(collision_struct)*getCollisionCeiling());
queue->finish();
}
示例3: load_kernels
virtual bool load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
path_trace_program.add_kernel(ustring("path_trace"));
programs.push_back(&path_trace_program);
return true;
}
示例4: load_kernels
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
vector<OpenCLDeviceBase::OpenCLProgram*> &programs)
{
bool single_program = OpenCLInfo::use_single_program();
program_data_init = OpenCLDeviceBase::OpenCLProgram(this,
single_program ? "split" : "split_data_init",
single_program ? "kernel_split.cl" : "kernel_data_init.cl",
get_build_options(this, requested_features));
program_data_init.add_kernel(ustring("path_trace_data_init"));
programs.push_back(&program_data_init);
program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this,
single_program ? "split" : "split_state_buffer_size",
single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl",
get_build_options(this, requested_features));
program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size"));
programs.push_back(&program_state_buffer_size);
return split_kernel->load_kernels(requested_features);
}
示例5: sizeof
void OpenCLMomentumV3::find_collisions(uint8_t* message, collision_struct* collisions, size_t* collision_count) {
// temp storage
*collision_count = 0;
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel = program->getKernel("kernel_sha512");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
assert(kernel != NULL);
//size_t BLOCKSIZE = main.getPlatform(0)->getDevice(0)->getMaxWorkGroupSize();
size_t BLOCKSIZE = kernel->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
//has to be a power of 2
BLOCKSIZE = 1<<log2(BLOCKSIZE);
size_t BLOCKSIZE_CLEAN = kernel_cleanup->getWorkGroupSize(OpenCLMain::getInstance().getDevice(device_num));
BLOCKSIZE_CLEAN = 1<<log2(BLOCKSIZE_CLEAN);
// printf("BLOCKSIZE = %ld\n", BLOCKSIZE);
// printf("BLOCKSIZE_CLEAN = %ld\n", BLOCKSIZE_CLEAN);
// cleans up the hash table
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, BLOCKSIZE_CLEAN);
queue->enqueueWriteBuffer(cl_message, message, sizeof(uint8_t)*32);
queue->enqueueWriteBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueKernel1D(kernel, MAX_MOMENTUM_NONCE/8, BLOCKSIZE);
queue->enqueueReadBuffer(temp_collisions_count, collision_count, sizeof(size_t));
queue->enqueueReadBuffer(temp_collisions, collisions, sizeof(collision_struct)*getCollisionCeiling());
queue->finish();
}
示例6: SHA512_Init
void OpenCLMomentumV8::find_collisions(uint8_t* message, collision_struct* out_buff, size_t* out_count) {
// temp storage
*out_count = 0;
uint32_t ht_size = 1<<HASH_BITS;
SHA512_Context c512_avxsse;
SHA512_Init(&c512_avxsse);
uint8_t midhash[32+4];
memcpy(midhash+4, message, 32);
*((uint32_t*)midhash) = 0;
SHA512_Update_Simple(&c512_avxsse, midhash, 32+4);
SHA512_PreFinal(&c512_avxsse);
*(uint32_t *)(&c512_avxsse.buffer.bytes[0]) = 0;
uint64_t * swap_helper = (uint64_t*)(&c512_avxsse.buffer.bytes[0]);
for (int i = 1; i < 5; i++) {
swap_helper[i] = SWAP64(swap_helper[i]);
}
OpenCLContext *context = OpenCLMain::getInstance().getDevice(device_num)->getContext();
OpenCLProgram *program = context->getProgram(0);
OpenCLKernel *kernel_calculate_all_hashes = program->getKernel("calculate_all_hashes");
OpenCLKernel *kernel_fill_table = program->getKernel("fill_table");
OpenCLKernel *kernel_find_collisions = program->getKernel("find_collisions");
OpenCLKernel *kernel_cleanup = program->getKernel("kernel_clean_hash_table");
OpenCLDevice * device = OpenCLMain::getInstance().getDevice(device_num);
// cleans up the hash table
size_t kc_wgsize = kernel_cleanup->getWorkGroupSize(device);
kc_wgsize = 1<<log2(kc_wgsize);
queue->enqueueKernel1D(kernel_cleanup, 1<<HASH_BITS, kc_wgsize);
// printf("Cleaning the HT\n");
// queue->finish();
queue->enqueueWriteBuffer(cl_message, c512_avxsse.buffer.bytes, sizeof(uint8_t)*SHA512_BLOCK_SIZE);
// step 1, calculate hashes
size_t kcah_wgsize = kernel_calculate_all_hashes->getWorkGroupSize(device);
kcah_wgsize = 1<<log2(kcah_wgsize);
queue->enqueueKernel1D(kernel_calculate_all_hashes, MAX_MOMENTUM_NONCE/8,
kcah_wgsize);
// uint64_t * apa = new uint64_t[MAX_MOMENTUM_NONCE];
// queue->enqueueReadBuffer(hashes, apa, sizeof(uint64_t)*MAX_MOMENTUM_NONCE);
// queue->finish();
//
// printf("testing hashes\n");
// uint64_t count = 0;
// for (int i = 0; i < MAX_MOMENTUM_NONCE; i++) {
// if (apa[i] == 0) {
// count++;
// printf("BAD HASH AT: %d %X\n", i, apa[i]);
// }
// }
// printf("counted %X bad hashes\n", count);
// printf("NOW REALLY TEST THEM hashes\n");
// count = 0;
// for (uint32_t i = 0; i < MAX_MOMENTUM_NONCE/8; i+=8) {
// sph_sha512_context c512_sph; //SPH
// sph_sha512_init(&c512_sph);
// sph_sha512(&c512_sph, &i, 4);
// sph_sha512(&c512_sph, message, 32);
// uint64_t out[8];
// sph_sha512_close(&c512_sph, out);
// for (int j =0; j < 8; j++) {
// if (apa[i+j] != out[j]) {
// count++;
// uint64_t xxx = apa[i+j];
// printf("BAD HASH AT: %d => %X != %X\n", i, apa[i+j], out[j]);
// }
// }
// }
// printf("counted %X bad hashes\n", count);
// step 2, populate hashtable
size_t kft_wgsize = kernel_fill_table->getWorkGroupSize(device);
kft_wgsize = 1<<log2(kft_wgsize);
queue->enqueueKernel1D(kernel_fill_table, MAX_MOMENTUM_NONCE,
kft_wgsize);
// printf("step 2, populate hashtable\n");
// queue->finish();
queue->enqueueWriteBuffer(collisions_count, out_count, sizeof(size_t));
// step 3, find collisions
size_t kfc_wgsize = kernel_find_collisions->getWorkGroupSize(device);
kfc_wgsize = 1<<log2(kfc_wgsize);
queue->enqueueKernel1D(kernel_find_collisions, MAX_MOMENTUM_NONCE,
kfc_wgsize);
// printf("step 3, find collisions\n");
// queue->finish();
queue->enqueueReadBuffer(collisions_count, out_count, sizeof(size_t));
queue->enqueueReadBuffer(collisions, out_buff, sizeof(collision_struct)*getCollisionCeiling());
// printf("step 4, copy output\n");
//.........这里部分代码省略.........
示例7: main
int main()
{
if(OpenCLRuntime::Initialize() != SICKL_SUCCESS)
{
printf("Could not OpenCL Context\n");
return -1;
}
Mandelbrot mbrot;
mbrot.Parse();
mbrot.GetRoot().Print();
OpenCLProgram program;
OpenCLCompiler::Build(mbrot, program);
#if 0
// init GLEW/GLUT and other gl setup
if(!OpenGLRuntime::Initialize())
{
printf("Could not create OpenGL Context\n");
return -1;
}
OpenGLCompiler comp;
Mandelbrot mbrot;
mbrot.Parse();
/// Prints the AST generated from the Mandelbrot source
mbrot.GetRoot().Print();
/// Compile our OpenGL program
OpenGLProgram* program = comp.Build(mbrot);
/// Print the generated GLSL source
printf("%s\n", program->GetSource().c_str());
const uint32_t width = 350 * 5;
const uint32_t height = 200 * 5;
const uint32_t colors = mbrot.max_iterations;
/// Generate the color table (a nice gold)
float* color_map_data = new float[3 * colors];
for(uint32_t i = 0; i < colors; i++)
{
float x = i/(float)colors;
color_map_data[3 * i + 0] = 191.0f / 255.0f * (1.0f - x);
color_map_data[3 * i + 1] = 125.0f / 255.0f * (1.0f - x);
color_map_data[3 * i + 2] = 37.0f / 255.0f * (1.0f - x);
}
/// put it int a 1d buffer
OpenGLBuffer1D color_map(colors, ReturnType::Float3, color_map_data);
/// our output buffer
OpenGLBuffer2D result(width, height, ReturnType::Float3, nullptr);
OpenGLBuffer2D copy(width, height, ReturnType::Float3, nullptr);
/// initialize our program
program->Initialize(width, height);
/// get our binding locations for each of the program input and outputs
input_t min_loc = program->GetInputHandle("min");
input_t max_loc = program->GetInputHandle("max");
input_t color_map_loc = program->GetInputHandle("color_map");
output_t output_loc = program->GetOutputHandle("output");
/// sets min values
program->SetInput(min_loc, -2.5f, -1.0f);
/// sets max values
program->SetInput(max_loc, 1.0f, 1.0f);
/// set the scaler
program->SetInput(color_map_loc, color_map);
/// sets the render location
program->BindOutput(output_loc, result);
/// Runs the program
program->Run();
/// We can copy our data to the second buffer
copy.SetData(result);
float* result_buffer = nullptr;
/// We can either read result back from the texture
copy.GetData(result_buffer);
/// Or from the framebuffer (which is faster on nvidia hardware at least)
program->GetOutput(output_loc, result_buffer);
/// Finally, dump the image to a Bitmap to view
BMP image;
image.SetSize(width, height);
for(uint32_t i = 0; i < height; i++)
//.........这里部分代码省略.........
示例8: printf
//.........这里部分代码省略.........
if (target_mem > 0) {
// Convert target to bytes, subtract 1 to guarantee results LESS THAN target
uint32 target_mem_temp = (target_mem * 1024 * 1024);
// Lazy calculation, assume large bucket_size, scale back from there
bucket_size = 1024;
while (bucket_size > 0 && calc_total_mem_usage(buckets_log2, bucket_size) > target_mem_temp) { bucket_size--; }
// Make sure the parameter configuration is sane:
if (bucket_size < 1) {
printf("ERROR: Memory target of %d MB cannot be attained with 2^%d buckets!\n", target_mem, buckets_log2);
printf(" Please lower the value of \"-b\" or increase the value of \"-m\".\n");
exit(0);
}
}
// Make sure we can allocate hash_list (cannot violate CL_DEVICE_MAX_MEM_ALLOC_SIZE)
cl_ulong required_mem = calc_hash_mem_usage(buckets_log2, bucket_size);
cl_ulong available_mem = device->getMaxMemAllocSize();
if (required_mem > available_mem) {
printf("ERROR: Device %d cannot allocate 2^%d buckets of %d elements!\n", device_num, buckets_log2, bucket_size);
printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE is %d MB, this configuration requires %d MB\n", available_mem / 1024 / 1024, required_mem / 1024 / 1024);
printf(" Please lower the value of \"-b\" or \"-s\" or increase the value of \"-m\".\n");
exit(0);
}
// Make sure we can allocate nonce_map (cannot violate CL_DEVICE_MAX_MEM_ALLOC_SIZE)
required_mem = calc_index_mem_usage(buckets_log2, bucket_size);
available_mem = device->getMaxMemAllocSize();
if (required_mem > available_mem) {
printf("ERROR: Device %d cannot allocate index of 2^%d elements!\n", device_num, buckets_log2);
printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE is %d MB, this configuration requires %d MB\n", available_mem / 1024 / 1024, required_mem / 1024 / 1024);
printf(" Please lower the value of \"-b\" or increase the value of \"-m\".\n");
exit(0);
}
// Make sure the whole thing fits in memory
required_mem = calc_total_mem_usage(buckets_log2, bucket_size);
available_mem = device->getGlobalMemSize();
if (required_mem > available_mem) {
printf("ERROR: Device %d cannot store 2^%d buckets of %d elements!\n", device_num, buckets_log2, bucket_size);
printf(" CL_DEVICE_GLOBAL_MEM_SIZE is %d MB, this configuration requires %d MB\n", available_mem / 1024 / 1024, required_mem / 1024 / 1024);
printf(" Please lower the value of \"-b\" or \"-s\" or increase the value of \"-m\".\n");
exit(0);
}
// All clear, show the running parameters!
printf("Using %d work group size\n", wgs);
printf("Using vector size %d\n", vect_type);
printf("Using 2^%d buckets\n", buckets_log2);
printf("Using %d elements per bucket\n", bucket_size);
printf("Using %d MB of memory\n", required_mem / 1024 / 1024);
printf("Estimated drop percentage: %5.2f%%\n", 100 * poisson_estimate((1 << buckets_log2), MAX_MOMENTUM_NONCE, bucket_size));
printf("\n");
// Compile the OpenCL code
printf("Compiling OpenCL code... this may take 3-5 minutes\n");
bool isGPU = device->isGPU();
if (!isGPU) { gpu_watchdog_max_wait *= 6; } // Effectively disable the watchdog
std::stringstream params;
params << " -I ./opencl/";
params << " -D DEVICE_GPU=" << (isGPU ? 1 : 0);
params << " -D VECT_TYPE=" << vect_type;
params << " -D LOCAL_WGS=" << wgs;
params << " -D NUM_BUCKETS_LOG2=" << buckets_log2;
params << " -D BUCKET_SIZE=" << bucket_size;
#ifdef USE_SOURCE
std::vector<std::string> file_list;
file_list.push_back("opencl/momentum.cl");
OpenCLProgram* program = device->getContext()->loadProgramFromFiles(file_list, params.str());
#else
std::vector<std::string> input_src;
input_src.push_back(getMomentumOpenCL());
OpenCLProgram* program = device->getContext()->loadProgramFromStrings(input_src, params.str());
#endif
kernel_hash = program->getKernel("hash_step");
kernel_reset = program->getKernel("reset_and_seek");
mid_hash = device->getContext()->createBuffer(32 * sizeof(cl_uint), CL_MEM_READ_ONLY, NULL);
hash_list = device->getContext()->createBuffer(calc_hash_mem_usage(buckets_log2, bucket_size), CL_MEM_READ_WRITE, NULL);
index_list = device->getContext()->createBuffer(calc_index_mem_usage(buckets_log2, bucket_size), CL_MEM_READ_WRITE, NULL);
nonce_a = device->getContext()->createBuffer(256 * sizeof(cl_uint), CL_MEM_WRITE_ONLY, NULL);
nonce_b = device->getContext()->createBuffer(256 * sizeof(cl_uint), CL_MEM_WRITE_ONLY, NULL);
nonce_qty = device->getContext()->createBuffer(sizeof(cl_uint), CL_MEM_READ_WRITE, NULL);
q = device->getContext()->createCommandQueue(device);
}
示例9:
~OpenCLDeviceSplitKernel()
{
task_pool.stop();
/* Release kernels */
program_data_init.release();
delete split_kernel;
}
示例10:
~OpenCLDeviceMegaKernel()
{
task_pool.stop();
path_trace_program.release();
}
示例11:
~OpenCLDeviceSplitKernel()
{
task_pool.stop();
/* Release kernels */
program_data_init.release();
program_scene_intersect.release();
program_lamp_emission.release();
program_queue_enqueue.release();
program_background_buffer_update.release();
program_shader_eval.release();
program_holdout_emission_blurring_pathtermination_ao.release();
program_direct_lighting.release();
program_shadow_blocked.release();
program_next_iteration_setup.release();
program_sum_all_radiance.release();
/* Release global memory */
release_mem_object_safe(rng_coop);
release_mem_object_safe(throughput_coop);
release_mem_object_safe(L_transparent_coop);
release_mem_object_safe(PathRadiance_coop);
release_mem_object_safe(Ray_coop);
release_mem_object_safe(PathState_coop);
release_mem_object_safe(Intersection_coop);
release_mem_object_safe(kgbuffer);
release_mem_object_safe(sd);
release_mem_object_safe(sd_DL_shadow);
release_mem_object_safe(ray_state);
release_mem_object_safe(AOAlpha_coop);
release_mem_object_safe(AOBSDF_coop);
release_mem_object_safe(AOLightRay_coop);
release_mem_object_safe(BSDFEval_coop);
release_mem_object_safe(ISLamp_coop);
release_mem_object_safe(LightRay_coop);
release_mem_object_safe(Intersection_coop_shadow);
#ifdef WITH_CYCLES_DEBUG
release_mem_object_safe(debugdata_coop);
#endif
release_mem_object_safe(use_queues_flag);
release_mem_object_safe(Queue_data);
release_mem_object_safe(Queue_index);
release_mem_object_safe(work_array);
#ifdef __WORK_STEALING__
release_mem_object_safe(work_pool_wgs);
#endif
release_mem_object_safe(per_sample_output_buffers);
if(hostRayStateArray != NULL) {
free(hostRayStateArray);
}
}