当前位置: 首页>>代码示例>>C++>>正文


C++ cutilSafeCall函数代码示例

本文整理汇总了C++中cutilSafeCall函数的典型用法代码示例。如果您正苦于以下问题:C++ cutilSafeCall函数的具体用法?C++ cutilSafeCall怎么用?C++ cutilSafeCall使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了cutilSafeCall函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: AllocateResources

const unsigned long CUDARunner::RunStep()
{
	unsigned int best=0;
	unsigned int bestg=~0;

	if(m_in==0 || m_out==0 || m_devin==0 || m_devout==0)
	{
		AllocateResources(m_numb,m_numt);
	}

	cutilSafeCall(cudaMemcpy(m_devin,m_in,sizeof(cuda_in),cudaMemcpyHostToDevice));

	cuda_process_helper(m_devin,m_devout,GetStepIterations(),GetStepBitShift(),m_numb,m_numt);

	cutilSafeCall(cudaMemcpy(m_out,m_devout,m_numb*m_numt*sizeof(cuda_out),cudaMemcpyDeviceToHost));

	for(int i=0; i<m_numb*m_numt; i++)
	{
		if(m_out[i].m_bestnonce!=0 && m_out[i].m_bestg<bestg)
		{
			best=m_out[i].m_bestnonce;
			bestg=m_out[i].m_bestg;
		}
	}

	return CryptoPP::ByteReverse(best);

}
开发者ID:chancn,项目名称:bitcoin-pool,代码行数:28,代码来源:bitcoinminercuda.cpp

示例2: benchmark

void
benchmark(int iterations) 
{
    // allocate memory for result
    unsigned int *d_result;
    unsigned int size = width * height * sizeof(unsigned int);
    cutilSafeCall( cudaMalloc( (void**) &d_result, size));

    // warm-up
    gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);

    cutilSafeCall( cudaThreadSynchronize() );
    cutilCheckError( cutStartTimer( timer));

    // execute the kernel
    for(int i=0; i<iterations; i++) {
        gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);
    }

    cutilSafeCall( cudaThreadSynchronize() );
    cutilCheckError( cutStopTimer( timer));

    // check if kernel execution generated an error
    cutilCheckMsg("Kernel execution failed");

    printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));
    printf("%.2f Mpixels/sec\n", (width*height*iterations / (cutGetTimerValue( timer) / 1000.0f)) / 1e6);

    cutilSafeCall(cudaFree(d_result));
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:30,代码来源:recursiveGaussian-host.cpp

示例3: rayIntervalSplatting

void CUDARayCastSDF::render(const HashData& hashData, const HashParams& hashParams, const DepthCameraData& cameraData, const mat4f& lastRigidTransform)
{
	rayIntervalSplatting(hashData, hashParams, cameraData, lastRigidTransform);
	m_data.d_rayIntervalSplatMinArray = m_rayIntervalSplatting.mapMinToCuda();
	m_data.d_rayIntervalSplatMaxArray = m_rayIntervalSplatting.mapMaxToCuda();

	// Start query for timing
	if(GlobalAppState::getInstance().s_timingsDetailledEnabled)
	{
		cutilSafeCall(cudaDeviceSynchronize()); 
		m_timer.start();
	}

	renderCS(hashData, m_data, cameraData, m_params);

	//convertToCameraSpace(cameraData);
	if (!m_params.m_useGradients)
	{
		computeNormals(m_data.d_normals, m_data.d_depth4, m_params.m_width, m_params.m_height);
	}

	m_rayIntervalSplatting.unmapCuda();

	// Wait for query
	if(GlobalAppState::getInstance().s_timingsDetailledEnabled)
	{
		cutilSafeCall(cudaDeviceSynchronize()); 
		m_timer.stop();
		TimingLog::totalTimeRayCast+=m_timer.getElapsedTimeMS();
		TimingLog::countTimeRayCast++;
	}
}
开发者ID:ZaneYang,项目名称:VoxelHashing,代码行数:32,代码来源:CUDARayCastSDF.cpp

示例4: linearbackwardeuler_init

__HOST__
int linearbackwardeuler_init(solver_props *props){
  solver_mem *mem;
  linearbackwardeuler_opts *opts = (linearbackwardeuler_opts*)&props->opts;
  unsigned int bandwidth = opts->upperhalfbw + opts->lowerhalfbw + 1;

#if defined TARGET_GPU
  // Allocates GPU global memory for solver's persistent data
  switch(opts->lsolver){
  case LSOLVER_DENSE:
    cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT)));
    break;
  case LSOLVER_BANDED:
    cutilSafeCall(cudaMalloc((void **)&mem, PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT)));
    break;
  default:
    return 1;
  }
#else // CPU and OPENMP targets
  switch(opts->lsolver){
  case LSOLVER_DENSE:
    mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * props->statesize * sizeof(CDATAFORMAT));
    break;
  case LSOLVER_BANDED:
    mem = (solver_mem *)malloc(PARALLEL_MODELS * props->statesize * bandwidth * sizeof(CDATAFORMAT));
    break;
  default:
    return 1;
  }
#endif

  props->mem = mem; /* The matrix */

  return 0;
}
开发者ID:joshuaecook,项目名称:simengine,代码行数:35,代码来源:linearbackwardeuler.c

示例5: SOLVER

void SOLVER(bogacki_shampine, free, TARGET, SIMENGINE_STORAGE, bogacki_shampine_mem *mem) {
#if defined TARGET_GPU
  bogacki_shampine_mem tmem;

  cutilSafeCall(cudaMemcpy(&tmem, mem, sizeof(bogacki_shampine_mem), cudaMemcpyDeviceToHost));

  GPU_ENTRY(free_props, SIMENGINE_STORAGE, tmem.props);

  cutilSafeCall(cudaFree(tmem.k1));
  cutilSafeCall(cudaFree(tmem.k2));
  cutilSafeCall(cudaFree(tmem.k3));
  cutilSafeCall(cudaFree(tmem.k4));
  cutilSafeCall(cudaFree(tmem.temp));
  cutilSafeCall(cudaFree(tmem.next_states));
  cutilSafeCall(cudaFree(tmem.z_next_states));
  cutilSafeCall(cudaFree(tmem.cur_timestep));
  cutilSafeCall(cudaFree(mem));

  GPU_ENTRY(exit, SIMENGINE_STORAGE);

#else // Used for CPU and OPENMP targets

  free(mem->k1);
  free(mem->k2);
  free(mem->k3);
  free(mem->k4);
  free(mem->temp);
  free(mem->next_states);
  free(mem->z_next_states);
  free(mem->cur_timestep);
  free(mem);
#endif
}
开发者ID:joshuaecook,项目名称:simengine,代码行数:33,代码来源:bogacki_shampine.c

示例6: checkResultCuda

////////////////////////////////////////////////////////////////////////////////
//! Check if the result is correct or write data to file for external
//! regression testing
////////////////////////////////////////////////////////////////////////////////
void checkResultCuda(int argc, char** argv, const GLuint& vbo)
{
    cutilSafeCall(cudaGLUnregisterBufferObject(vbo));

    // map buffer object
    glBindBuffer(GL_ARRAY_BUFFER_ARB, vbo );
    float* data = (float*) glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);

    // check result
    if(cutCheckCmdLineFlag(argc, (const char**) argv, "regression")) {
        // write file for regression test
        cutilCheckError(cutWriteFilef("./data/regression.dat",
            data, mesh_width * mesh_height * 3, 0.0));
    }

    // unmap GL buffer object
    if(! glUnmapBuffer(GL_ARRAY_BUFFER)) {
        fprintf(stderr, "Unmap buffer failed.\n");
        fflush(stderr);
    }

    cutilSafeCall(cudaGLRegisterBufferObject(vbo));

    CUT_CHECK_ERROR_GL();
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:29,代码来源:simpleGL.cpp

示例7: test

bool test(int (*setup)(float ** dev_ptr_ptr, unsigned int * w_ptr, unsigned int * h_ptr)) {
	float * dev;
	unsigned int width;
	unsigned int height;
	setup(&dev, &width, &height);
	float * val = sat_scan_gold<float>(dev,width, height);
	cutilSafeCall(cudaFree(dev));
	setup(&dev, &width, &height);
	float * yours = sat_scan<float, kind>(dev,width, height);
	cutilSafeCall(cudaFree(dev));
	float EPSILON = 0.1f;

	bool isItGood = true;
	/*
	for (unsigned int i = 0; i < len; i++) {
	printf("%f, ", val[i]);
	}
	printf("\n");
	for (unsigned int i = 0; i < len; i++) {
	printf("%f, ", yours[i]);
	}
	printf("\n");
	*/
	for (unsigned int i = 0; i < width * height; i++) {
		if (!(abs(val[i] - yours[i]) < EPSILON)) {
			printf("V: %f Y: %f at %d\n", val[i], yours[i], i);
			isItGood = false;
			break;
		}
	}

	free(val);
	free(yours);
	return isItGood;
}
开发者ID:eclipse0922,项目名称:cbench-cis565s11,代码行数:35,代码来源:sat_scan.cpp

示例8: GPU_ENTRY

forwardeuler_mem *SOLVER(forwardeuler, init, TARGET, SIMENGINE_STORAGE, solver_props *props) {
#if defined TARGET_GPU
  GPU_ENTRY(init, SIMENGINE_STORAGE);

  // Temporary CPU copies of GPU datastructures
  forwardeuler_mem tmem;
  // GPU datastructures
  forwardeuler_mem *dmem;
  
  // Allocate GPU space for mem and pointer fields of mem (other than props)
  cutilSafeCall(cudaMalloc((void**)&dmem, sizeof(forwardeuler_mem)));
  tmem.props = GPU_ENTRY(init_props, SIMENGINE_STORAGE, props);;
  cutilSafeCall(cudaMalloc((void**)&tmem.k1, props->statesize*props->num_models*sizeof(CDATAFORMAT)));

  // Copy mem structure to GPU
  cutilSafeCall(cudaMemcpy(dmem, &tmem, sizeof(forwardeuler_mem), cudaMemcpyHostToDevice));

  return dmem;
  
#else // Used for CPU and OPENMP targets

  forwardeuler_mem *mem = (forwardeuler_mem*)malloc(sizeof(forwardeuler_mem));

  mem->props = props;
  mem->k1 = (CDATAFORMAT*)malloc(props->statesize*props->num_models*sizeof(CDATAFORMAT));

  return mem;

#endif // defined TARGET_GPU
}
开发者ID:joshuaecook,项目名称:simengine,代码行数:30,代码来源:forwardeuler.c

示例9: load_tables_as_textures

void CUDAMarcher::PrepareTerrain()
{
	mCudaEdgeTable = 0;
	mCudaTriTable = 0;
	mCudaVertTable = 0;
	mCudaPerlinDst1 = 0;
	mCudaPerlinDst2 = 0;
	mCudaPerlinDst3 = 0;

	//-- Load tables
	load_tables_as_textures( &mCudaEdgeTable, &mCudaTriTable, &mCudaVertTable );

	//-- Create and load perlin data
	host_PerlinInitialize(0);
	host_InitPerlinData(PERLIN_DATA_RANK, PERLIN_DATA_SIZE);

	unsigned int bufsize = sizeof(uint) * MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE*MARCHING_BLOCK_SIZE;
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVerts, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelVertsScan, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupied, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_voxelOccupiedScan, bufsize));
	cutilSafeCall(cudaMalloc((void**) &CUDABlock::cuda_compVoxelArray, bufsize));

	//Init blocks
	Init(4, 3, 4);
}
开发者ID:zeroZshadow,项目名称:agt6-cuda,代码行数:26,代码来源:CUDAMarcher.cpp

示例10: allocCuda0

/**
 * Allocates a vector in GPU and initializes it to zero.
 *
 * @param size length of the vector to be allocated.
 * @return the pointer to the GPU allocated memory.
 */
void* allocCuda0(int size) {
    void* out;
    cutilSafeCall( cudaMalloc((void**) &out, size));
    cutilSafeCall( cudaMemset(out, 0, size));
	if (DEBUG) printf("allocCuda(%d): %p\n", size, out);
    return out;
}
开发者ID:edanssandes,项目名称:MASA-CUDAlign,代码行数:13,代码来源:cuda_util.cpp

示例11: raytrace

void raytrace()
{
	uint* imagedata;
	cutilSafeCall(cudaGLMapBufferObject((void**)&imagedata, pbo));

	float3 A, B, C;
	camera.getImagePlane(A, B, C);
	dev_camera d_cam(camera.getPosition(), A, B, C, aperture, focal);
	dev_light d_light(light.getPosition(), light.getColor(), 4096);
	//need to change here.
	float3 minAABB, maxAABB;
	world.getAABB(minAABB, maxAABB);
	sceneInfo scene(world.getNumTriangles(), world.getNumSpheres(), world.getNumBoxes(), minAABB, maxAABB);
	//TODO: add control for clear buffer here.
	//change here for the many object case
	raytraceImage(imagedata, dev_lastframe_ptr, dev_num_layers, r_width, r_height, moved, d_cam, d_light, scene);
	//for showing the real frame rate
	cudaMemcpy(&frame_num, dev_num_layers, sizeof(float), cudaMemcpyDeviceToHost);
	frame_num++;
	cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice);
	cutilSafeCall(cudaGLUnmapBufferObject(pbo));

	//download texture from pbo
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
	glBindTexture(GL_TEXTURE_2D, framebuffer);
	glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, r_width, r_height, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);

	CUT_CHECK_ERROR_GL();
}
开发者ID:duxing,项目名称:GPUFinal,代码行数:30,代码来源:main.cpp

示例12: allocCudaSeq

/**
 * Allocates and copies sequence data in GPU.
 *
 * @param data the sequence data string.
 * @param len the length of the sequence.
 * @param padding_len extra padding length.
 * @param padding_char character to be used as padding.
 *
 * @return the pointer to the GPU memory allocated for the sequence.
 */
unsigned char* allocCudaSeq(const char* data, const int len, const int padding_len, const char padding_char) {
	unsigned char* out = (unsigned char*)allocCuda0(len+padding_len);
	if (DEBUG) printf("allocCudaSeq(%p, %d, %d, %d): %p\n", data, len, padding_len, padding_char, out);
	cutilSafeCall( cudaMemcpy(out, data, len, cudaMemcpyHostToDevice));
	cutilSafeCall( cudaMemset(out+len, padding_char, padding_len) );
    return out;
}
开发者ID:edanssandes,项目名称:MASA-CUDAlign,代码行数:17,代码来源:cuda_util.cpp

示例13: cleanup

void cleanup()
{
    cutilCheckError( cutDeleteTimer( timer));
    if(h_img)cutFree(h_img);
    cutilSafeCall(cudaFree(d_img));
    cutilSafeCall(cudaFree(d_temp));

    // Refer to boxFilter_kernel.cu for implementation
    freeTextures();

    //DEPRECATED: cutilSafeCall(cudaGLUnregisterBufferObject(pbo));
    cudaGraphicsUnregisterResource(cuda_pbo_resource);

    glDeleteBuffersARB(1, &pbo);
    glDeleteTextures(1, &texid);
    glDeleteProgramsARB(1, &shader);

    if (g_CheckRender) {
        delete g_CheckRender;
        g_CheckRender = NULL;
    }
    if (g_FrameBufferObject) {
        delete g_FrameBufferObject;
        g_FrameBufferObject = NULL;
    }
}
开发者ID:yyzreal,项目名称:gpuocelot,代码行数:26,代码来源:boxFilter.cpp

示例14: uploadToGPU

void uploadToGPU()
{//upload scene to GPU
	std::vector<float4> triangles;
	std::vector<float4> spheres;
	std::vector<float4> boxes;
	world.updateToArray(triangles, spheres, boxes);

	size_t triangle_size = triangles.size() * sizeof(float4);
	size_t sphere_size = spheres.size() * sizeof(float4);
	size_t boxes_size = boxes.size() * sizeof(float4);

	//merge into one scene
	std::vector<float4> sceneObj;
	sceneObj.insert(sceneObj.end(), triangles.begin(), triangles.end());
	sceneObj.insert(sceneObj.end(), spheres.begin(), spheres.end());
	sceneObj.insert(sceneObj.end(), boxes.begin(), boxes.end());

	//if the scene is dynamic. this function might has to be updated per frame.
	//in that case should avoid mallocing every frame.
	cutilSafeCall(cudaMalloc((void**)&dev_scene_pointer, triangle_size + sphere_size + boxes_size));
	cudaMemcpy(dev_scene_pointer, &sceneObj[0], triangle_size + sphere_size + boxes_size, cudaMemcpyHostToDevice);//&triangles[0]
	bindTexture(dev_scene_pointer, triangles.size()/4, spheres.size()/3, boxes.size()/4);//change the denominator for more information to bind 

	//add a device framebuffer for last frame.
	std::vector<float3> clean(r_width * r_height, make_float3(0.0));
	//float zero = 0;
	cutilSafeCall(cudaMalloc((void**)&dev_lastframe_ptr, r_width * r_height * sizeof(float3)));
	cudaMemcpy(dev_lastframe_ptr, &clean[0], r_width * r_height * sizeof(float3), cudaMemcpyHostToDevice);
	cutilSafeCall(cudaMalloc((void**)&dev_num_layers, sizeof(int)));
	cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice);
}
开发者ID:duxing,项目名称:GPUFinal,代码行数:31,代码来源:main.cpp

示例15: runBenchmark

void runBenchmark(int iterations)
{
    // once without timing to prime the GPU
    nbody->update(activeParams.m_timestep);

    cutilSafeCall(cudaEventRecord(startEvent, 0));

    for (int i = 0; i < iterations; ++i)
    {
        nbody->update(activeParams.m_timestep);
    }

    cutilSafeCall(cudaEventRecord(stopEvent, 0));  
    cudaEventSynchronize(stopEvent);

    float milliseconds = 0;
    cutilSafeCall( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
    double interactionsPerSecond = 0;
    double gflops = 0;
    computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
    
    printf("%d bodies, total time for %d iterations: %0.3f ms\n", 
           numBodies, iterations, milliseconds);
    printf("= %0.3f billion interactions per second\n", interactionsPerSecond);
    printf("= %0.3f GFLOP/s at %d flops per interaction\n", gflops, 20);   
}
开发者ID:AnkurAnandapu,项目名称:ocelot-fork,代码行数:26,代码来源:nbody.cpp


注:本文中的cutilSafeCall函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。