C++ clBuildProgram函数代码示例

本文整理汇总了C++中clBuildProgram函数的典型用法代码示例。如果您正苦于以下问题：C++ clBuildProgram函数的具体用法？C++ clBuildProgram怎么用？C++ clBuildProgram使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了clBuildProgram函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: main


//.........这里部分代码省略.........
	    if(error != CL_SUCCESS) {
	        perror("Can't create a valid OpenCL context");
	        exit(1);
	    }

	    /* For each device, create a buffer and partition that data among the devices for compute! */
	    cl_mem inobj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
	                                  sizeof(cl_float8) * DATA_SIZE, ud_in, &error);
	    if(error != CL_SUCCESS) {
	        perror("Can't create a buffer");
	        exit(1);
	    }

        int offset = 0; 
        for(int i = 0; i < numOfDevices; ++i, ++offset ) {
	        /* Load the two source files into temporary datastores */
	        const char *file_names[] = {"vectorization.cl"}; 
	        const int NUMBER_OF_FILES = 1;
	        char* buffer[NUMBER_OF_FILES];
	        size_t sizes[NUMBER_OF_FILES];
	        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);
	
	        /* Create the OpenCL program object */
	        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);				
		    if(error != CL_SUCCESS) {
		      perror("Can't create the OpenCL program object");
		      exit(1);   
		    }

	        /* Build OpenCL program object and dump the error message, if any */
	        char *program_log;
	        size_t log_size;
            char* build_options = "-fbin-llvmir -fbin-amdil -fbin-exe";
	        error = clBuildProgram(program, 1, &devices[i], build_options, NULL, NULL);		
		    if(error != CL_SUCCESS) {
		      // If there's an error whilst building the program, dump the log
		      clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
		      program_log = (char*) malloc(log_size+1);
		      program_log[log_size] = '\0';
		      clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 
		            log_size+1, program_log, NULL);
		      printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
		      free(program_log);
		      exit(1);
		    }
	  
	        /* Query the program as to how many kernels were detected */
	        cl_uint numOfKernels;
	        error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
	        if (error != CL_SUCCESS) {
	            perror("Unable to retrieve kernel count from program");
	            exit(1);
	        }
	        cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
	        error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);

            /* Loop thru each kernel and execute on device */
	        for(cl_uint j = 0; j < numOfKernels; j++) {
	            char kernelName[32];
	            cl_uint argCnt;
	            clGetKernelInfo(kernels[j], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
	            clGetKernelInfo(kernels[j], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
	            printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
	            printf("About to create command queue and enqueue this kernel...\n");
	
	            /* Create a command queue */

开发者ID:gujunli，项目名称:OpenCL-Benchmark，代码行数:67，代码来源:vectorization.c

示例2: main

// main() for simple buffer and sub-buffer example
//
int main(int argc, char** argv)
{
    cl_int errNum;
    cl_uint numPlatforms;
    cl_uint numDevices;
    cl_platform_id * platformIDs;
    cl_device_id * deviceIDs;
    cl_context context;
    cl_program program;
    std::vector<cl_kernel> kernels;
    std::vector<cl_command_queue> queues;
    std::vector<cl_mem> buffers;
    int * inputOutput;
    std::cout << "Simple buffer and sub-buffer Example" << std::endl;
    // First, select an OpenCL platform to run on.
    errNum = clGetPlatformIDs(0, NULL, &numPlatforms);
    checkErr(
             (errNum != CL_SUCCESS) ?
             errNum : (numPlatforms <= 0 ? -1 : CL_SUCCESS),
             "clGetPlatformIDs");
    platformIDs = (cl_platform_id *)alloca(sizeof(cl_platform_id) * numPlatforms);
    std::cout << "Number of platforms: \t" << numPlatforms << std::endl;
    errNum = clGetPlatformIDs(numPlatforms, platformIDs, NULL);
    checkErr(
             (errNum != CL_SUCCESS) ?
             errNum : (numPlatforms <= 0 ? -1 : CL_SUCCESS),
             "clGetPlatformIDs");
    std::ifstream srcFile("simple.cl");
    
    checkErr(srcFile.is_open() ? CL_SUCCESS : -1, "reading simple.cl");
    
    std::string srcProg(
                        std::istreambuf_iterator<char>(srcFile),
                        (std::istreambuf_iterator<char>()));
    const char * src = srcProg.c_str();
    size_t length = srcProg.length();
    deviceIDs = NULL;
    DisplayPlatformInfo(
                        platformIDs[PLATFORM_INDEX],
                        CL_PLATFORM_VENDOR,
                        "CL_PLATFORM_VENDOR");
    errNum = clGetDeviceIDs(
                            platformIDs[PLATFORM_INDEX],
                            CL_DEVICE_TYPE_ALL,
                            0,
                            NULL,
                            &numDevices);
    if (errNum != CL_SUCCESS && errNum != CL_DEVICE_NOT_FOUND){
        checkErr(errNum, "clGetDeviceIDs");
    }
    
    deviceIDs = (cl_device_id *)alloca(
                                       sizeof(cl_device_id) * numDevices);
    errNum = clGetDeviceIDs(
                            platformIDs[PLATFORM_INDEX],
                            CL_DEVICE_TYPE_ALL,
                            numDevices,
                            &deviceIDs[0],
                            NULL);
    checkErr(errNum, "clGetDeviceIDs");
    
    cl_context_properties contextProperties[] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platformIDs[PLATFORM_INDEX],
        0
    };
    
    context = clCreateContext(
                              contextProperties,
                              numDevices,
                              deviceIDs,
                              NULL,
                              NULL,
                              &errNum);
    
    checkErr(errNum, "clCreateContext");
    // Create program from source
    program = clCreateProgramWithSource(
                                        context,
                                        1,
                                        &src,
                                        &length,
                                        &errNum);
    checkErr(errNum, "clCreateProgramWithSource");
    
    // Build program
    errNum = clBuildProgram(
                            program,
                            numDevices,
                            deviceIDs,
                            "-I.",
                            NULL,
                            NULL);

    if (errNum != CL_SUCCESS){
        // Determine the reason for the error
        char buildLog[16384];
//.........这里部分代码省略.........

开发者ID:BeauJoh，项目名称:Simple，代码行数:101，代码来源:simple.cpp

示例3: main

int main( int argc, char **argv )
{	
  struct pb_Parameters *params;
  params = pb_ReadParameters(&argc, argv);
  if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL))
    {
      fprintf(stderr, "Expecting one input filename\n");
      exit(-1);
    }

  int err = 0;
  if(argc != 3)
    err |= 1;
  else {
    char* numend;
    N = strtol(argv[1], &numend, 10);
    if(numend == argv[1])
      err |= 2;
    B = strtol(argv[2], &numend, 10);
    if(numend == argv[2])
      err |= 4;
  }

  if(err)
  {
    fprintf(stderr, "Expecting two integers for N and B\n");
    exit(-1);
  }

  //8*1024*1024;
  int n_bytes = N * B* sizeof(float2);
  int nthreads = T;
  
  struct pb_TimerSet timers;
  pb_InitializeTimerSet(&timers);
  pb_AddSubTimer(&timers, oclOverhead, pb_TimerID_KERNEL);
  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);

  float *shared_source =(float *)malloc(n_bytes);  
  float2 *source    = (float2 *)malloc( n_bytes );
  float2 *result    = (float2 *)calloc( N*B, sizeof(float2) );

  inputData(params->inpFiles[0],(float*)source,N*B*2);

  // OpenCL Code 
  cl_int clErrNum;
  pb_Context* pb_context;
  pb_context = pb_InitOpenCLContext(params);
  if (pb_context == NULL) {
    fprintf (stderr, "Error: No OpenCL platform/device can be found."); 
    return -1;
  }

  cl_int clStatus;
  cl_device_id clDevice = (cl_device_id) pb_context->clDeviceId;
  cl_platform_id clPlatform = (cl_platform_id) pb_context->clPlatformId;
  cl_context clContext = (cl_context) pb_context->clContext;
  cl_command_queue clCommandQueue;

  cl_program clProgram;

  cl_kernel fft_kernel;

  cl_mem d_source, d_work; //float2 *d_source, *d_work;
  cl_mem d_shared_source; //float *d_shared_source;

  clCommandQueue = clCreateCommandQueue(clContext, clDevice, CL_QUEUE_PROFILING_ENABLE, &clErrNum);
  OCL_ERRCK_VAR(clErrNum);
  
  pb_SetOpenCL(&clContext, &clCommandQueue);
  pb_SwitchToSubTimer(&timers, oclOverhead, pb_TimerID_KERNEL);

  const char *source_path = "src/opencl_nvidia/fft_kernel.cl";
  char *sourceCode;
  sourceCode = readFile(source_path);
  if (sourceCode == NULL) {
    fprintf(stderr, "Could not load program source of '%s'\n", source_path); exit(1);
  }

  clProgram = clCreateProgramWithSource(clContext, 1, (const char **)&sourceCode, NULL, &clErrNum);
  OCL_ERRCK_VAR(clErrNum);

  free(sourceCode);

  /*
    char compileOptions[1024];
  //                -cl-nv-verbose // Provides register info for NVIDIA devices
  // Set all Macros referenced by kernels
  sprintf(compileOptions, "\
                -D PRESCAN_THREADS=%u\
                -D KB=%u -D UNROLL=%u\
                -D BINS_PER_BLOCK=%u -D BLOCK_X=%u",

                prescanThreads,
                lmemKB, UNROLL,
                bins_per_block, blockX
            ); 
  */
  OCL_ERRCK_RETVAL ( clBuildProgram(clProgram, 1, &clDevice, NULL /*compileOptions*/, NULL, NULL) );

//.........这里部分代码省略.........

开发者ID:abduld，项目名称:Parboil，代码行数:101，代码来源:fft.cpp

示例4: InitOpenCL

void InitOpenCL()
{
    // 1. Get a platform.
    cl_platform_id platform;
    
    clGetPlatformIDs( 1, &platform, NULL );
    // 2. Find a gpu device.
    cl_device_id device;
    
    clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU,
                   1,
                   &device,
                   NULL);
    // 3. Create a context and command queue on that device.
    cl_context context = clCreateContext( NULL,
                                         1,
                                         &device,
                                         NULL, NULL, NULL);
    queue = clCreateCommandQueue( context,
                                 device,
                                 0, NULL );
    // 4. Perform runtime source compilation, and obtain kernel entry point.
    std::ifstream file("scene.cl");
    std::string source;
    if (file){
    while(!file.eof()){
        char line[256];
        file.getline(line,255);
        source += std::string(line) + "\n";
    }
    }
    if (source.length()==0)
    {
        std::string err = "fail to load shader";
    }
    
    cl_ulong maxSize;
    clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), &maxSize, 0);
    
    const char* str = source.c_str();
    cl_program program = clCreateProgramWithSource( context,
                                                   1,
                                                   &str,
                                                   NULL, NULL );
    cl_int result = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
    if ( result ){
        char* build_log;
        size_t log_size;
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
        build_log = new char[log_size+1];
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
        build_log[log_size] = '\0';
        if( log_size > 2 ) {
            std::cout << "build log: " << build_log << std::endl;
        }
        delete[] build_log;
        std::cout << "Error during compilation! (" << result << ")" << std::endl;
    }
    kernel = clCreateKernel( program, "tracekernel", NULL );
    // 5. Create a data buffer.
    buffer        = clCreateBuffer( context,
                                   CL_MEM_WRITE_ONLY,
                                   kWidth * kHeight *sizeof(cl_float4),
                                   NULL, 0 );
    viewTransform = clCreateBuffer( context,
                                   CL_MEM_READ_WRITE,
                                   16 *sizeof(cl_float),
                                   NULL, 0 );
    
    worldTransforms = clCreateBuffer( context,
                                     CL_MEM_READ_WRITE,
                                     16 *sizeof(cl_float)*2,
                                     NULL, 0 );
    
    clSetKernelArg(kernel, 0, sizeof(buffer), (void*) &buffer);
    clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &kWidth);
    clSetKernelArg(kernel, 2, sizeof(cl_uint), (void*) &kWidth);
    clSetKernelArg(kernel, 3, sizeof(viewTransform), (void*) &viewTransform);
    clSetKernelArg(kernel, 4, sizeof(worldTransforms), (void*) &worldTransforms);
}

开发者ID:autosquid，项目名称:CLRay，代码行数:80，代码来源:raytracer.cpp

示例5: main

	int main()
	{
	cl_int num_rand = 4096*256; /* The number of random numbers generated using one generator */
	int count_all, i, num_generator = sizeof(mts)/sizeof(mts[0]); /* The number of generators */
	double pi;
	cl_platform_id platform_id = NULL;
	cl_uint ret_num_platforms;
	cl_device_id device_id = NULL;
	cl_uint ret_num_devices;
	cl_context context = NULL;
	cl_command_queue command_queue = NULL;
	cl_program program = NULL;
	cl_kernel kernel_mt = NULL, kernel_pi = NULL;
	size_t kernel_code_size;
	char *kernel_src_str;
	cl_uint *result;
	cl_int ret;
	FILE *fp;
	cl_mem rand, count;
	size_t global_item_size[3], local_item_size[3];
	cl_mem dev_mts;
	cl_event ev_mt_end, ev_pi_end, ev_copy_end;
	cl_ulong prof_start, prof_mt_end, prof_pi_end, prof_copy_end;
 
	clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
	clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id,
	&ret_num_devices);
	context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
	result = (cl_uint*)malloc(sizeof(cl_uint)*num_generator);
 
	command_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret);
	fp = fopen("mt.cl", "r");
	kernel_src_str = (char*)malloc(MAX_SOURCE_SIZE);
	kernel_code_size = fread(kernel_src_str, 1, MAX_SOURCE_SIZE, fp);
	fclose(fp);
 
	/* Create output buffer */
	rand = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint)*num_rand*num_generator, NULL, &ret);
	count = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint)*num_generator, NULL, &ret);
 
	/* Build Program*/
	program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str,
	(const size_t *)&kernel_code_size, &ret);
	clBuildProgram(program, 1, &device_id, "", NULL, NULL);
	kernel_mt = clCreateKernel(program, "genrand", &ret);
	kernel_pi = clCreateKernel(program, "calc_pi", &ret);
 
	/* Create input parameter */
	dev_mts = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(mts), NULL, &ret);
	clEnqueueWriteBuffer(command_queue, dev_mts, CL_TRUE, 0, sizeof(mts), mts, 0, NULL, NULL);
 
	/* Set Kernel Arguments */
	clSetKernelArg(kernel_mt, 0, sizeof(cl_mem), (void*)&rand); /* Random numbers (output of genrand) */
	clSetKernelArg(kernel_mt, 1, sizeof(cl_mem), (void*)&dev_mts); /* MT parameter (input to genrand) */
	clSetKernelArg(kernel_mt, 2, sizeof(num_rand), &num_rand); /* Number of random numbers to generate */
 
	clSetKernelArg(kernel_pi, 0, sizeof(cl_mem), (void*)&count); /* Counter for points within circle (output of calc_pi) */
	clSetKernelArg(kernel_pi, 1, sizeof(cl_mem), (void*)&rand); /* Random numbers (input to calc_pi) */
	clSetKernelArg(kernel_pi, 2, sizeof(num_rand), &num_rand); /* Number of random numbers used */
 
	global_item_size[0] = num_generator; global_item_size[1] = 1; global_item_size[2] = 1;
	local_item_size[0] = num_generator; local_item_size[1] = 1; local_item_size[2] = 1;
 
	/* Create a random number array */
	clEnqueueNDRangeKernel(command_queue, kernel_mt, 1, NULL, global_item_size, local_item_size, 0, NULL, &ev_mt_end);
 
	/* Compute PI */
	clEnqueueNDRangeKernel(command_queue, kernel_pi, 1, NULL, global_item_size, local_item_size, 0, NULL, &ev_pi_end);
 
	/* Get result */
	clEnqueueReadBuffer(command_queue, count, CL_TRUE, 0, sizeof(cl_uint)*num_generator, result, 0, NULL, &ev_copy_end);
 
	/* Average the values of PI */
	count_all = 0;
	for (i=0; i < num_generator; i++) {
	count_all += result[i];
	}
 
	pi = ((double)count_all)/(num_rand * num_generator) * 4;
	printf("pi = %f\n", pi);
 
	/* Get execution time info */
	clGetEventProfilingInfo(ev_mt_end, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &prof_start, NULL);
	clGetEventProfilingInfo(ev_mt_end, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &prof_mt_end, NULL);
	clGetEventProfilingInfo(ev_pi_end, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &prof_pi_end, NULL);
	clGetEventProfilingInfo(ev_copy_end, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &prof_copy_end, NULL);
 
	printf(" mt: %f[ms]\n"
		" pi: %f[ms]\n"
		" copy: %f[ms]\n",
		(prof_mt_end - prof_start)/(1000000.0),
		(prof_pi_end - prof_mt_end)/(1000000.0),
		(prof_copy_end - prof_pi_end)/(1000000.0));
 
	clReleaseEvent(ev_mt_end);
	clReleaseEvent(ev_pi_end);
	clReleaseEvent(ev_copy_end);
 
	clReleaseMemObject(rand);
	clReleaseMemObject(count);
//.........这里部分代码省略.........

开发者ID:manug2，项目名称:manug2-repo，代码行数:101，代码来源:mt.c

示例6: buildOpenCLKernels_flux_calc_kernely

void buildOpenCLKernels_flux_calc_kernely(int xdim0, int ydim0, int xdim1,
                                          int ydim1, int xdim2, int ydim2,
                                          int xdim3, int ydim3) {

  // int ocl_fma = OCL_FMA;
  if (!isbuilt_flux_calc_kernely) {
    buildOpenCLKernels();
    // clSafeCall( clUnloadCompiler() );
    cl_int ret;
    char *source_filename[1] = {(char *)"./OpenCL/flux_calc_kernely.cl"};

    // Load the kernel source code into the array source_str
    FILE *fid;
    char *source_str[1];
    size_t source_size[1];

    for (int i = 0; i < 1; i++) {
      fid = fopen(source_filename[i], "r");
      if (!fid) {
        fprintf(stderr, "Can't open the kernel source file!\n");
        exit(1);
      }

      source_str[i] = (char *)malloc(4 * 0x1000000);
      source_size[i] = fread(source_str[i], 1, 4 * 0x1000000, fid);
      if (source_size[i] != 4 * 0x1000000) {
        if (ferror(fid)) {
          printf("Error while reading kernel source file %s\n",
                 source_filename[i]);
          exit(-1);
        }
        if (feof(fid))
          printf("Kernel source file %s succesfuly read.\n",
                 source_filename[i]);
        // printf("%s\n",source_str[i]);
      }
      fclose(fid);
    }

    printf("Compiling flux_calc_kernely %d source -- start \n", OCL_FMA);

    // Create a program from the source
    OPS_opencl_core.program = clCreateProgramWithSource(
        OPS_opencl_core.context, 1, (const char **)&source_str,
        (const size_t *)&source_size, &ret);
    clSafeCall(ret);

    // Build the program
    char buildOpts[255 * 4];
    char *pPath = NULL;
    pPath = getenv("OPS_INSTALL_PATH");
    if (pPath != NULL)
      if (OCL_FMA)
        sprintf(buildOpts,
                "-cl-mad-enable -DOCL_FMA -I%s/c/include -DOPS_WARPSIZE=%d  "
                "-Dxdim0_flux_calc_kernely=%d  -Dydim0_flux_calc_kernely=%d  "
                "-Dxdim1_flux_calc_kernely=%d  -Dydim1_flux_calc_kernely=%d  "
                "-Dxdim2_flux_calc_kernely=%d  -Dydim2_flux_calc_kernely=%d  "
                "-Dxdim3_flux_calc_kernely=%d  -Dydim3_flux_calc_kernely=%d ",
                pPath, 32, xdim0, ydim0, xdim1, ydim1, xdim2, ydim2, xdim3,
                ydim3);
      else
        sprintf(buildOpts,
                "-cl-mad-enable -I%s/c/include -DOPS_WARPSIZE=%d  "
                "-Dxdim0_flux_calc_kernely=%d  -Dydim0_flux_calc_kernely=%d  "
                "-Dxdim1_flux_calc_kernely=%d  -Dydim1_flux_calc_kernely=%d  "
                "-Dxdim2_flux_calc_kernely=%d  -Dydim2_flux_calc_kernely=%d  "
                "-Dxdim3_flux_calc_kernely=%d  -Dydim3_flux_calc_kernely=%d ",
                pPath, 32, xdim0, ydim0, xdim1, ydim1, xdim2, ydim2, xdim3,
                ydim3);
    else {
      sprintf((char *)"Incorrect OPS_INSTALL_PATH %s\n", pPath);
      exit(EXIT_FAILURE);
    }

    ret = clBuildProgram(OPS_opencl_core.program, 1, &OPS_opencl_core.device_id,
                         buildOpts, NULL, NULL);

    if (ret != CL_SUCCESS) {
      char *build_log;
      size_t log_size;
      clSafeCall(clGetProgramBuildInfo(
          OPS_opencl_core.program, OPS_opencl_core.device_id,
          CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size));
      build_log = (char *)malloc(log_size + 1);
      clSafeCall(clGetProgramBuildInfo(
          OPS_opencl_core.program, OPS_opencl_core.device_id,
          CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL));
      build_log[log_size] = '\0';
      fprintf(
          stderr,
          "=============== OpenCL Program Build Info ================\n\n%s",
          build_log);
      fprintf(stderr,
              "\n========================================================= \n");
      free(build_log);
      exit(EXIT_FAILURE);
    }
    printf("compiling flux_calc_kernely -- done\n");

//.........这里部分代码省略.........

开发者ID:gihanmudalige，项目名称:OPS，代码行数:101，代码来源:flux_calc_kernely_opencl_kernel.cpp

示例7: main

int main(void) {
    const size_t ARRAY_BYTES = ARRAY_SIZE * sizeof(float);

    // Generate the input array on the host.
    float h_a[ARRAY_SIZE];
    float h_b[ARRAY_SIZE];
    for (int i = 0; i < ARRAY_SIZE; i++) {
        h_a[i] = (float)i;
        h_b[i] = (float)(2 * i);
    }

    float h_c[ARRAY_SIZE];

    FILE *fp;
    char *source_str;
    size_t source_size;

    fp = fopen("vectors_cl.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char *)malloc(MAX_SOURCE_SIZE);
    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose(fp);

    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1,
                         &device_id, &ret_num_devices);

    // Create an OpenCL context
    cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);

    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    // Create memory buffers on the device for each vector
    cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
                                      ARRAY_BYTES, NULL, &ret);
    cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
                                      ARRAY_BYTES, NULL, &ret);
    cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
                                      ARRAY_BYTES, NULL, &ret);

    // Copy h_a and h_b to memory buffer
    ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
                               ARRAY_BYTES, h_a, 0, NULL, NULL);
    ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0,
                               ARRAY_BYTES, h_b, 0, NULL, NULL);

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1,
        (const char **)&source_str, (const size_t *)&source_size, &ret);
    if (ret != 0) {
        printf("clCreateProgramWithSource returned non-zero status %d\n\n", ret);
        exit(1);
    }

    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
    if (ret != 0) {
        printf("clBuildProgram returned non-zero status %d: ", ret);

        if (ret == CL_INVALID_PROGRAM) {
            printf("invalid program\n");
        } else if (ret == CL_INVALID_VALUE) {
            printf("invalid value\n");
        } else if (ret == CL_INVALID_DEVICE) {
            printf("invalid device\n");
        } else if (ret == CL_INVALID_BINARY) {
            printf("invalid binary\n");
        } else if (ret == CL_INVALID_BUILD_OPTIONS) {
            printf("invalid build options\n");
        } else if (ret == CL_INVALID_OPERATION) {
            printf("invalid operation\n");
        } else if (ret == CL_COMPILER_NOT_AVAILABLE) {
            printf("compiler not available\n");
        } else if (ret == CL_BUILD_PROGRAM_FAILURE) {
            printf("build program failure\n");

            // Determine the size of the log
            size_t log_size;
            clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

            // Allocate memory for the log
            char *log = (char *) malloc(log_size);

            // Get the log
            clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

            // Print the log
            printf("%s\n", log);
        } else if (ret == CL_OUT_OF_HOST_MEMORY) {
            printf("out of host memory\n");
        }
//.........这里部分代码省略.........

开发者ID:Unit03，项目名称:gpgpu_talk，代码行数:101，代码来源:vectors_cl.c

示例8: getAllDevices

/**
 * @brief Creates an array of objects containing the OpenCL variables of each device
 * @param trDataBase The training database which will contain the instances and the features
 * @param selInstances The instances choosen as initial centroids
 * @param transposedTrDataBase The training database already transposed
 * @param conf The structure with all configuration parameters
 * @return A pointer containing the objects
 */
CLDevice *createDevices(const float *const trDataBase, const int *const selInstances, const float *const transposedTrDataBase, Config *const conf) {


	/********** Find the OpenCL devices specified in configuration ***********/

	// OpenCL variables
	cl_uint numPlatformsDevices;
	cl_device_type deviceType;
	cl_program program;
	cl_kernel kernel;
	cl_int status;

	// Others variables
	auto allDevices = getAllDevices();
	CLDevice *devices = new CLDevice[conf -> nDevices + (conf -> ompThreads > 0)];

	for (int dev = 0; dev < conf -> nDevices; ++dev) {

		bool found = false;
		for (int allDev = 0; allDev < allDevices.size() && !found; ++allDev) {

			// Get the specified OpenCL device
			char dbuff[120];
			check(clGetDeviceInfo(allDevices[allDev], CL_DEVICE_NAME, sizeof(dbuff), dbuff, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_NAME);

			// If the device exists...
			if (conf -> devices[dev] == dbuff) {
				devices[dev].device = allDevices[allDev];
				devices[dev].deviceName = dbuff;
				check(clGetDeviceInfo(devices[dev].device, CL_DEVICE_TYPE, sizeof(cl_device_type), &(devices[dev].deviceType), NULL) != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_TYPE);


				/********** Device local memory usage ***********/

				long int usedMemory = conf -> nFeatures * sizeof(cl_uchar); // Chromosome of the individual
				usedMemory += conf -> trNInstances * sizeof(cl_uchar); // Mapping buffer
				usedMemory += conf -> K * conf -> nFeatures * sizeof(cl_float); // Centroids buffer
				usedMemory += conf -> trNInstances * sizeof(cl_float); // DistCentroids buffer
				usedMemory += conf -> K * sizeof(cl_int); // Samples_in_k buffer

				// Get the maximum local memory size
				long int maxMemory;
				check(clGetDeviceInfo(devices[dev].device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(long int), &maxMemory, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_MAXMEM);

				// Avoid exceeding the maximum local memory available. 1024 bytes of margin
				check(usedMemory > maxMemory - 1024, "%s:\n\tMax memory: %ld bytes\n\tAllow memory: %ld bytes\n\tUsed memory: %ld bytes\n", CL_ERROR_DEVICE_LOCALMEM, maxMemory, maxMemory - 1024, usedMemory);


				/********** Create context ***********/

				devices[dev].context = clCreateContext(NULL, 1, &(devices[dev].device), 0, 0, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_CONTEXT);


				/********** Create Command queue ***********/

				devices[dev].commandQueue = clCreateCommandQueue(devices[dev].context, devices[dev].device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_DEVICE_QUEUE);


				/********** Create kernel ***********/

				// Open the file containing the kernels
				std::fstream kernels(conf -> kernelsFileName.c_str(), std::fstream::in);
				check(!kernels.is_open(), "%s\n", CL_ERROR_FILE_OPEN);

				// Obtain the size
				kernels.seekg(0, kernels.end);
				size_t fSize = kernels.tellg();
				kernels.seekg(0, kernels.beg);

				char *kernelSource = new char[fSize];
				kernels.read(kernelSource, fSize);
				kernels.close();

				// Create program
				program = clCreateProgramWithSource(devices[dev].context, 1, (const char **) &kernelSource, &fSize, &status);
				check(status != CL_SUCCESS, "%s\n", CL_ERROR_PROGRAM_BUILD);

				// Build program for the device in the context
				char buildOptions[196];
				sprintf(buildOptions, "-I include -D N_INSTANCES=%d -D N_FEATURES=%d -D N_OBJECTIVES=%d -D K=%d -D MAX_ITER_KMEANS=%d", conf -> trNInstances, conf -> nFeatures, conf -> nObjectives, conf -> K, conf -> maxIterKmeans);
				if (clBuildProgram(program, 1, &(devices[dev].device), buildOptions, 0, 0) != CL_SUCCESS) {
					char buffer[4096];
					fprintf(stderr, "Error: Could not build the program\n");
					check(clGetProgramBuildInfo(program, devices[dev].device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL) != CL_SUCCESS, "%s\n", CL_ERROR_PROGRAM_ERRORS);
					check(true, "%s\n", buffer);
				}

				// Create kernel
				const char *kernelName = (devices[dev].deviceType == CL_DEVICE_TYPE_GPU) ? "kmeansGPU" : "";
				devices[dev].kernel = clCreateKernel(program, kernelName, &status);
//.........这里部分代码省略.........

开发者ID:rotty11，项目名称:TIN2015-67020-P，代码行数:101，代码来源:clUtils.cpp

示例9: main

int main(int argc, char** argv)
{
    int err;                            // error code returned from api calls
    
    float data[DATA_SIZE];              // original data set given to device
    float results[DATA_SIZE];           // results returned from device
    unsigned int correct;               // number of correct results returned
    
    size_t global;                      // global domain size for our calculation
    size_t local;                       // local domain size for our calculation
    
    cl_device_id device_id;             // compute device id
    cl_context context;                 // compute context
    cl_command_queue commands;          // compute command queue
    cl_program program;                 // compute program
    cl_kernel kernel;                   // compute kernel
    
    cl_mem input;                       // device memory used for the input array
    cl_mem output;                      // device memory used for the output array
    
    // Fill our data set with random float values
    //
    int i = 0;
    unsigned int count = DATA_SIZE;
    for(i = 0; i < count; i++)
        data[i] = rand() / (float)RAND_MAX;
    
    // Connect to a compute device
    //
    int gpu = 1;
    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to create a device group!\n");
        return EXIT_FAILURE;
    }
    
    // Create a compute context
    //
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
    if (!context)
    {
        printf("Error: Failed to create a compute context!\n");
        return EXIT_FAILURE;
    }
    
    // Create a command commands
    //
    commands = clCreateCommandQueue(context, device_id, 0, &err);
    if (!commands)
    {
        printf("Error: Failed to create a command commands!\n");
        return EXIT_FAILURE;
    }
    
    // Create the compute program from the source buffer
    //
    program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err);
    if (!program)
    {
        printf("Error: Failed to create compute program!\n");
        return EXIT_FAILURE;
    }
    
    // Build the program executable
    //
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];
        
        printf("Error: Failed to build program executable!\n");
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        exit(1);
    }
    
    // Create the compute kernel in the program we wish to run
    //
    kernel = clCreateKernel(program, "square", &err);
    if (!kernel || err != CL_SUCCESS)
    {
        printf("Error: Failed to create compute kernel!\n");
        exit(1);
    }
    
    // Create the input and output arrays in device memory for our calculation
    //
    input = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * count, NULL, NULL);
    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
    if (!input || !output)
    {
        printf("Error: Failed to allocate device memory!\n");
        exit(1);
    }
    
    // Write our data set into the input array in device memory
    //
    err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
//.........这里部分代码省略.........

开发者ID:BisariaLove，项目名称:HelloOpenCL，代码行数:101，代码来源:hello_world.c

示例10: btAssert


//.........这里部分代码省略.........

//	char *source = oclLoadProgSource(".//Demos//SpheresGrid//SpheresGrid.cl", "", &program_length);
	//char *source = btOclLoadProgSource(".//Demos//SpheresOpenCL//Shared//SpheresGrid.cl", "", &program_length);

	char *source = btOclLoadProgSource(fileName, "", &program_length);
	if(source == NULL)
	{
		printf("ERROR : OpenCL can't load file %s\n", fileName);
	}
//	oclCHECKERROR (source == NULL, oclFALSE);   
	btAssert(source != NULL);

	// create the program
	printf("OpenCL compiles %s ...", fileName);

#endif //LOAD_FROM_MEMORY


	//printf("%s\n", source);

	m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&source, &program_length, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
#ifndef LOAD_FROM_MEMORY
	free(source);
#endif //LOAD_FROM_MEMORY

	//#define LOCAL_SIZE_LIMIT 1024U
#define LOCAL_SIZE_MAX 1024U

		    // Build the program with 'mad' Optimization option
#ifdef MAC
	const char* flags = "-I. -DLOCAL_SIZE_MAX=1024U -cl-mad-enable -DMAC -DGUID_ARG";
#else
	const char* flags = "-I. -DLOCAL_SIZE_MAX=1024U -DGUID_ARG= ";
#endif
	// build the program
	ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, flags, NULL, NULL);
	if(ciErrNum != CL_SUCCESS)
	{
		// write out standard error
//		oclLog(LOGBOTH | ERRORMSG, (double)ciErrNum, STDERROR);
		// write out the build log and ptx, then exit
		char cBuildLog[10240];
//		char* cPtx;
//		size_t szPtxLength;
		clGetProgramBuildInfo(m_cpProgram, m_cdDevice, CL_PROGRAM_BUILD_LOG, 
							  sizeof(cBuildLog), cBuildLog, NULL );
//		oclGetProgBinary(m_cpProgram, oclGetFirstDev(m_cxMainContext), &cPtx, &szPtxLength);
//		oclLog(LOGBOTH | CLOSELOG, 0.0, "\n\nLog:\n%s\n\n\n\n\nPtx:\n%s\n\n\n", cBuildLog, cPtx);
		printf("\n\n%s\n\n\n", cBuildLog);
		printf("Press ENTER key to terminate the program\n");
		getchar();
		exit(-1); 
	}
	printf("OK\n");

	// create the kernels

	postInitDeviceData();

	initKernel(PARTICLES_KERNEL_COMPUTE_CELL_ID, "kComputeCellId");
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_COMPUTE_CELL_ID].m_kernel, 1, sizeof(cl_mem), (void*) &m_dPos);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_COMPUTE_CELL_ID].m_kernel, 2, sizeof(cl_mem), (void*) &m_dPosHash);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_COMPUTE_CELL_ID].m_kernel, 3, sizeof(cl_mem), (void*) &m_dSimParams);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	initKernel(PARTICLES_KERNEL_INTEGRATE_MOTION, "kIntegrateMotion");
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 1, sizeof(cl_mem), (void *) &m_dPos);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 2, sizeof(cl_mem), (void *) &m_dVel);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 3, sizeof(cl_mem), (void *) &m_dSimParams);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);


	initKernel(PARTICLES_KERNEL_CLEAR_CELL_START, "kClearCellStart");
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_CLEAR_CELL_START].m_kernel, 0, sizeof(int),		(void *) &m_numGridCells);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_CLEAR_CELL_START].m_kernel, 1, sizeof(cl_mem),	(void*) &m_dCellStart);

	initKernel(PARTICLES_KERNEL_FIND_CELL_START, "kFindCellStart");
//	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 0, sizeof(int),	(void*) &m_numParticles);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 1, sizeof(cl_mem),	(void*) &m_dPosHash);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 2, sizeof(cl_mem),	(void*) &m_dCellStart);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 3, sizeof(cl_mem),	(void*) &m_dPos);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 4, sizeof(cl_mem),	(void*) &m_dVel);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 5, sizeof(cl_mem),	(void*) &m_dSortedPos);
	ciErrNum |= clSetKernelArg(m_kernels[PARTICLES_KERNEL_FIND_CELL_START].m_kernel, 6, sizeof(cl_mem),	(void*) &m_dSortedVel);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	initKernel(PARTICLES_KERNEL_COLLIDE_PARTICLES, "kCollideParticles");
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 1, sizeof(cl_mem),	(void*) &m_dVel);
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 2, sizeof(cl_mem),	(void*) &m_dSortedPos);
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 3, sizeof(cl_mem),	(void*) &m_dSortedVel);
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 4, sizeof(cl_mem),	(void*) &m_dPosHash);
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 5, sizeof(cl_mem),	(void*) &m_dCellStart);
	ciErrNum  = clSetKernelArg(m_kernels[PARTICLES_KERNEL_COLLIDE_PARTICLES].m_kernel, 6, sizeof(cl_mem),	(void*) &m_dSimParams);

	initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL, "kBitonicSortCellIdLocal");
	initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_LOCAL_1, "kBitonicSortCellIdLocal1");
	initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_GLOBAL, "kBitonicSortCellIdMergeGlobal");
	initKernel(PARTICLES_KERNEL_BITONIC_SORT_CELL_ID_MERGE_LOCAL, "kBitonicSortCellIdMergeLocal");
}

开发者ID:AndreAhmed，项目名称:directxgameengine，代码行数:101，代码来源:btParticlesDemoDynamicsWorld.cpp

示例11: main

int main() {
	char buf[]="Hello, World!";
	size_t srcsize, worksize=strlen(buf);
	
	cl_int error;
	cl_platform_id platform;
	cl_device_id device;
	cl_uint platforms, devices;

	// Fetch the Platform and Device IDs; we only want one.
	error=clGetPlatformIDs(1, &platform, &platforms);
	error=clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, &devices);
	cl_context_properties properties[]={
		CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
		0};
	// Note that nVidia's OpenCL requires the platform property
	cl_context context=clCreateContext(properties, 1, &device, NULL, NULL, &error);
	cl_command_queue cq = clCreateCommandQueue(context, device, 0, &error);
	
	rot13(buf);	// scramble using the CPU
	puts(buf);	// Just to demonstrate the plaintext is destroyed

	//char src[8192];
	//FILE *fil=fopen("rot13.cl","r");
	//srcsize=fread(src, sizeof src, 1, fil);
	//fclose(fil);
	
	const char *src=rot13_cl;
	srcsize=strlen(rot13_cl);

	const char *srcptr[]={src};
	// Submit the source code of the rot13 kernel to OpenCL
	cl_program prog=clCreateProgramWithSource(context,
		1, srcptr, &srcsize, &error);
	// and compile it (after this we could extract the compiled version)
	error=clBuildProgram(prog, 0, NULL, "", NULL, NULL);

	// Allocate memory for the kernel to work with
	cl_mem mem1, mem2;
	mem1=clCreateBuffer(context, CL_MEM_READ_ONLY, worksize, NULL, &error);
	mem2=clCreateBuffer(context, CL_MEM_WRITE_ONLY, worksize, NULL, &error);
	
	// get a handle and map parameters for the kernel
	cl_kernel k_rot13=clCreateKernel(prog, "rot13", &error);
	clSetKernelArg(k_rot13, 0, sizeof(mem1), &mem1);
	clSetKernelArg(k_rot13, 1, sizeof(mem2), &mem2);

	// Target buffer just so we show we got the data from OpenCL
	char buf2[sizeof buf];
	buf2[0]='?';
	buf2[worksize]=0;

	// Send input data to OpenCL (async, don't alter the buffer!)
	error=clEnqueueWriteBuffer(cq, mem1, CL_FALSE, 0, worksize, buf, 0, NULL, NULL);
	// Perform the operation
	error=clEnqueueNDRangeKernel(cq, k_rot13, 1, NULL, &worksize, &worksize, 0, NULL, NULL);
	// Read the result back into buf2
	error=clEnqueueReadBuffer(cq, mem2, CL_FALSE, 0, worksize, buf2, 0, NULL, NULL);
	// Await completion of all the above
	error=clFinish(cq);
	
	// Finally, output out happy message.
	puts(buf2);
}

开发者ID:yeerkkiller1，项目名称:Go-AI，代码行数:64，代码来源:hellocl.c

示例12: main

int main()
{
	srand(unsigned(time(nullptr)));
	int err;                            // error code returned from api calls

	cl_device_id device_id;             // compute device id 
	cl_context context;                 // compute context
	cl_command_queue commands;          // compute command queue
	cl_program program;                 // compute program
	cl_kernel kernel;                   // compute kernel

										// OpenCL device memory for matrices
	cl_mem d_A;
	cl_mem d_B;
	cl_mem d_C;

	// set seed for rand()
	srand(2014);

	//Allocate host memory for matrices A and B
	unsigned int size_A = WA * HA;
	unsigned int mem_size_A = sizeof(float) * size_A;
	float* h_A = (float*)malloc(mem_size_A);

	unsigned int size_B = WB * HB;
	unsigned int mem_size_B = sizeof(float) * size_B;
	float* h_B = (float*)malloc(mem_size_B);

	//Initialize host memory
	randomMemInit(h_A, size_A);
	randomMemInit(h_B, size_B);

	//Allocate host memory for the result C
	unsigned int size_C = WC * HC;
	unsigned int mem_size_C = sizeof(float) * size_C;
	float* h_C = (float*)malloc(mem_size_C);

	printf("Initializing OpenCL device...\n");

	cl_uint dev_cnt = 0;
	clGetPlatformIDs(0, 0, &dev_cnt);

	cl_platform_id platform_ids[100];
	clGetPlatformIDs(dev_cnt, platform_ids, NULL);

	// Connect to a compute device
	int gpu = 1;
	err = clGetDeviceIDs(platform_ids[0], gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);
	if (err != CL_SUCCESS){
		printf("Error: Failed to create a device group!\n");
		return EXIT_FAILURE;
	}

	// Create a compute context 
	context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
	if (!context){
		printf("Error: Failed to create a compute context!\n");
		return EXIT_FAILURE;
	}

	// Create a command commands
	commands = clCreateCommandQueue(context, device_id, 0, &err);
	if (!commands){
		printf("Error: Failed to create a command commands!\n");
		return EXIT_FAILURE;
	}

	// Create the compute program from the source file
	char *KernelSource;
	long lFileSize = LoadOpenCLKernel("matrixmul_kernel.cl", &KernelSource);
	if (lFileSize < 0L){
		perror("File read failed");
		return 1;
	}
	//const char* KernelSource = loadKernelCPP(".\\matrixmul_kernel.cl");

	program = clCreateProgramWithSource(context, 1, (const char **)&KernelSource, NULL, &err);
	if (!program){
		printf("Error: Failed to create compute program!\n");
		return EXIT_FAILURE;
	}

	// Build the program executable
	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	if (err != CL_SUCCESS){
		size_t len;
		char buffer[2048];
		printf("Error: Failed to build program executable!\n");
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s\n", buffer);
		exit(1);
	}

	// Create the compute kernel in the program we wish to run
	kernel = clCreateKernel(program, "matrixMul", &err);
	if (!kernel || err != CL_SUCCESS){
		printf("Error: Failed to create compute kernel!\n");
		exit(1);
	}

//.........这里部分代码省略.........

开发者ID:RKopczynski，项目名称:ProgramowanieWielowatkowe，代码行数:101，代码来源:main.cpp

示例13: initopencl

void initopencl(void) {
	
	int i;


	// Get Platform and Device Info
	CL_CHECK(clGetPlatformIDs(1, &platform_id, &num_platforms));
	
	// Currently this program only runs on a SINGLE GPU.
	CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &num_devices));
	printf("=== %d OpenCL platform(s) found: ===\n", num_platforms);
	printf("=== %d OpenCL device(s) found on platform:\n", num_devices);
	
	
	char buffer[10240];
	cl_uint buf_uint;
	cl_ulong buf_ulong;
	printf("  -- %d --\n", i);
	CL_CHECK(clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
	printf("  DEVICE_NAME = %s\n", buffer);
	CL_CHECK(clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
	printf("  DEVICE_VENDOR = %s\n", buffer);
	CL_CHECK(clGetDeviceInfo(device_id, CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
	printf("  DEVICE_VERSION = %s\n", buffer);
	CL_CHECK(clGetDeviceInfo(device_id, CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
	printf("  DRIVER_VERSION = %s\n", buffer);
	CL_CHECK(clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
	printf("  DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
	CL_CHECK(clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
	printf("  DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
	CL_CHECK(clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
	printf("  DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);

	if (num_devices == 0)
	{	
		fprintf(stderr, "No Devices found that can run OpenCL.");
		exit(0);	
	}
	// Create OpenCL context
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
	if (ret != CL_SUCCESS) {
		
		fprintf(stderr, "Error creating context: Function returned %d \n\n", ret);
		exit(1);
	
	}
	// Create Command Queue
	command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
	if (ret != CL_SUCCESS) {
		
		fprintf(stderr, "Error creating command Queue: Function returned %d \n\n", ret);
		exit(1);
	
	}
	
	// Load the kernel source code into the array source_str
	FILE *fp;
	char *source_str;
	size_t source_size;
	
	fp = fopen("integrate.cl", "r");
	if (!fp) {
	    fprintf(stderr, "Failed to load kernel.\n");
	    exit(1);
	}
	
	source_str = (char*)malloc(MAX_SOURCE_SIZE);
	source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
	fclose( fp );	
	
	
	// Create a program from the kernel source
    program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);	
    if (ret != CL_SUCCESS) {	
			fprintf(stderr, "Error creating a program for integration3D. %d \n\n", (int)ret);
			exit(1);
	}
    // Build the program
    
    ret = clBuildProgram(program, 1, &device_id, "-DUSE_DOUBLE=1", NULL, NULL); 
    if (ret != CL_SUCCESS)
    {
    
    	size_t length;
    	char buffer[10240];
    	clGetProgramBuildInfo(program, 1, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length);
    	fprintf(stderr, "Error returned %d. \n\n", (int)ret);
    	printf("Error Log: \n\n %s \n\n", buffer);
    	exit(0);
    }
	
/*    // Create the OpenCL kernel (compute_points_Unstructure3D_1)
    kernel1 = clCreateKernel(program, "compute_points_Unstructure3D_1", &ret);
	if (ret != CL_SUCCESS) {	
			fprintf(stderr, "Error creating a kernel for compute_points_Unstructure3D_1. \n\n");
			exit(1);
	}
*/	
	// Create the OpenCL kernel (check_int)
    kernel2 = clCreateKernel(program, "check_int", &ret);
//.........这里部分代码省略.........

开发者ID:linyufly，项目名称:FlowVC-1，代码行数:101，代码来源:openclfuncs.c

示例14: vadd

int vadd(void) {
    // Create the two input vectors
    int i;
    const int LIST_SIZE = 1024;
    int *A = (int*)malloc(sizeof(int)*LIST_SIZE);
    int *B = (int*)malloc(sizeof(int)*LIST_SIZE);
    for(i = 0; i < LIST_SIZE; i++) {
        A[i] = i;
        B[i] = LIST_SIZE - i;
    }

    // Load the kernel source code into the array source_str
    FILE *fp;
    char *source_str;
    size_t source_size;

    const char *fname = "/home/ckit/program/workspace_java/OpenCLHookSample/jni/vector_add_kernel.cl";
//    const char *fname = "vadd.ir";
    fp = fopen(fname, "rb");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );

    // Get platform and device information
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;   
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;

      cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
	printf("clGetPlatformIDs err=%d,num_platforms=%d, platform_id=%x\n", ret, ret_num_platforms, (unsigned int)platform_id );

	//#define XXX CL_DEVICE_TYPE_DEFAULT
//	#define XXX CL_DEVICE_TYPE_ALL
//	#define XXX CL_DEVICE_TYPE_GPU
	#define XXX CL_DEVICE_TYPE_CPU

	cl_uint num_platforms = 2;
	cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id)* num_platforms);
	if(NULL == platforms){
		printf("malloc err!\n");
	}
	ret = clGetPlatformIDs(2, platforms, &ret_num_platforms);
	printf("clGetPlatformIDs err=%d,num_platforms=%d, platform_id=%x\n", ret, ret_num_platforms, (unsigned int)platforms[1] );
    ret = clGetDeviceIDs( platforms[0], XXX, 1, &device_id, &ret_num_devices);
    printf("clGetDeviceIDs err=%d,num_platforms=%d, device_id=%x\n", ret, ret_num_platforms, (unsigned int)device_id );
    char name[64];
    ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(char)*64, name, NULL);
    printf("device_name : %s\n", name);

    // Create an OpenCL context
//    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);

    cl_context_properties cps[3] =
    {
    	(cl_context_properties)CL_CONTEXT_PLATFORM,
        (cl_context_properties)platforms[0],
        (cl_context_properties)0
    };
    cl_context context = clCreateContextFromType( cps, XXX, NULL, NULL, &ret);
	printf("clCreateContextFromType err=%d,device_type=%x\n", ret, (unsigned int)XXX);

    // Create a command queue
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    // Create memory buffers on the device for each vector 
    cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
            LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
            LIST_SIZE * sizeof(int), NULL, &ret);

    // Copy the lists A and B to their respective memory buffers
    ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
            LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
    ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, 
            LIST_SIZE * sizeof(int), B, 0, NULL, NULL);

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1,
            (const char **)&source_str, (const size_t *)&source_size, &ret);
//    cl_int status;
//    cl_int err;
//    cl_program program = clCreateProgramWithBinary(
//    		context, 1, &device_id, &source_size, (const unsigned char **)&source_str, &status, &err);
    // Build the program
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);

    // Set the arguments of the kernel
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
    
//.........这里部分代码省略.........

开发者ID:chikat，项目名称:OpenCLExampleForCPPHook，代码行数:101，代码来源:vadd.cpp

示例15: main

int main( int argc, char* argv[] )
{
    // Length of vectors
    unsigned int n = 100000;
 
    // Host input vectors
    double *h_a;
    double *h_b;
    // Host output vector
    double *h_c;
 
    // Device input buffers
    cl_mem d_a;
    cl_mem d_b;
    // Device output buffer
    cl_mem d_c;
 
    cl_platform_id cpPlatform;        // OpenCL platform
    cl_device_id device_id;           // device ID
    cl_context context;               // context
    cl_command_queue queue;           // command queue
    cl_program program;               // program
    cl_kernel kernel;                 // kernel
 
    // Size, in bytes, of each vector
    size_t bytes = n*sizeof(double);
 
    // Allocate memory for each vector on host
    h_a = (double*)malloc(bytes);
    h_b = (double*)malloc(bytes);
    h_c = (double*)malloc(bytes);
 
    // Initialize vectors on host
    int i;
    for( i = 0; i < n; i++ )
    {
        h_a[i] = sinf(i)*sinf(i);
        h_b[i] = cosf(i)*cosf(i);
    }
 
    size_t globalSize, localSize;
    cl_int err;
 
    // Number of work items in each local work group
    localSize = 64;
 
    // Number of total work items - localSize must be devisor
    globalSize = ceil(n/(float)localSize)*localSize;
 
    // Bind to platform
    err = clGetPlatformIDs(1, &cpPlatform, NULL);
 
    // Get ID for the device
    err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
 
    // Create a context 
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
 
    // Create a command queue
    queue = clCreateCommandQueue(context, device_id, 0, &err);
 
    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1,
                            (const char **) & kernelSource, NULL, &err);
 
    // Build the program executable
    clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
 
    // Create the compute kernel in the program we wish to run
    kernel = clCreateKernel(program, "vecAdd", &err);
 
    // Create the input and output arrays in device memory for our calculation
    d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL);
    d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, NULL);
 
    // Write our data set into the input array in device memory
    err = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0,
                                   bytes, h_a, 0, NULL, NULL);
    err |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0,
                                   bytes, h_b, 0, NULL, NULL);
 
    // Set the arguments to our compute kernel
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
    err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);
 
    // Execute the kernel over the entire range of the data set 
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,
                                                              0, NULL, NULL);
 
    // Wait for the command queue to get serviced before reading back results
    clFinish(queue);
 
    // Read the results from the device
    clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0,
                                bytes, h_c, 0, NULL, NULL );
 
    //Sum up vector c and print result divided by n, this should equal 1 within error
//.........这里部分代码省略.........

开发者ID:GongYiLiao，项目名称:C_Daily，代码行数:101，代码来源:test0.c

注：本文中的clBuildProgram函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。