当前位置: 首页>>代码示例>>C++>>正文


C++ cuMemAlloc函数代码示例

本文整理汇总了C++中cuMemAlloc函数的典型用法代码示例。如果您正苦于以下问题:C++ cuMemAlloc函数的具体用法?C++ cuMemAlloc怎么用?C++ cuMemAlloc使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了cuMemAlloc函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: DeallocateResources

const bool CUDARunner::AllocateResources(const int numb, const int numt)
{
	bool allocated=true;
	CUresult rval;
	DeallocateResources();

	m_in=(cuda_in *)malloc(sizeof(cuda_in));
	m_out=(cuda_out *)malloc(numb*numt*sizeof(cuda_out));

	rval=cuMemAlloc(&m_devin,sizeof(cuda_in));
	if(rval!=CUDA_SUCCESS)
	{
		printf("Error %d allocating CUDA memory\n",rval);
		m_devin=0;
		allocated=false;
	}
	rval=cuMemAlloc(&m_devout,numb*numt*sizeof(cuda_out));
	if(rval!=CUDA_SUCCESS)
	{
		printf("Error %d allocating CUDA memory\n",rval);
		m_devout=0;
		allocated=false;
	}

	printf("Done allocating CUDA resources for (%d,%d)\n",numb,numt);
	return allocated;
}
开发者ID:1989gaurav,项目名称:rpcminer-mod,代码行数:27,代码来源:bitcoinminercuda.cpp

示例2: gpu_transpose_naive

void gpu_transpose_naive(int *dest, const int *src, int height, int width) {
    assert((width & (width - 1)) == 0);  // TODO
    assert((height & (height - 1)) == 0);

    cuda->set_default_module("transpose.ptx");
    CUfunction transpose_kernel = cuda->get_kernel("transpose_naive");

    int grid_dim_x = width / BLOCK_DIM_X;
    int grid_dim_y = height / BLOCK_DIM_Y;

    CUdeviceptr device_src;
    CUdeviceptr device_dest;
    cuMemAlloc(&device_src, width*height*sizeof(int));
    cuMemAlloc(&device_dest, width*height*sizeof(int));
    cuMemcpyHtoD(device_src, src, width*height*sizeof(int));

    void *args[] = {&device_dest, &device_src, &height, &width};
    cuda->launch_kernel_2d_sync(transpose_kernel,
            grid_dim_x, grid_dim_y,
            BLOCK_DIM_X, BLOCK_DIM_Y,
            args);

    cuMemcpyDtoH(dest, device_dest, width*height*sizeof(int));
    cuMemFree(device_src);
    cuMemFree(device_dest);
    cuda->ctx_synchronize();
}
开发者ID:Larhard,项目名称:Objective-Cuda,代码行数:27,代码来源:transpose.cpp

示例3: get_dev_mem

/*
 * get device memory
 */
void
get_dev_mem(void){

  res = cuMemAlloc(&x_dev, N * sizeof(double));
  if(res != CUDA_SUCCESS){
    printf("cuMemAlloc(x) failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuMemAlloc(&v_dev, N * sizeof(double));
  if(res != CUDA_SUCCESS){
    printf("cuMemAlloc(v) failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuMemAlloc(&error_dev, sizeof(int));
  if(res != CUDA_SUCCESS){
    printf("cuMemAlloc(error) failed: res = %s\n", conv(res));
    exit(1);
  }
  
  res = cuMemAlloc(&s_time_dev, sizeof(double));
  if(res != CUDA_SUCCESS){
    printf("cuMemAlloc(s_time) failed: res = %s\n", conv(res));
    exit(1);
  }
  
}
开发者ID:CPFL,项目名称:gtraffic,代码行数:31,代码来源:ov.c

示例4: gpu_transpose_with_shared_mem

void gpu_transpose_with_shared_mem(int *dest, const int *src, int height, int width) {
    assert((width & (width - 1)) == 0);  // TODO
    assert((height & (height - 1)) == 0);

    cuda->set_default_module(CUDA_PTX_PREFIX"transpose.cu.ptx");
    CUfunction transpose_kernel = cuda->get_kernel("transpose_with_shared_mem");

    int grid_dim_x = width / TILE_DIM;
    int grid_dim_y = height / TILE_DIM;

    CUdeviceptr device_src;
    CUdeviceptr device_dest;
    cuMemAlloc(&device_src, width*height*sizeof(int));
    cuMemAlloc(&device_dest, width*height*sizeof(int));
    cuMemcpyHtoD(device_src, src, width*height*sizeof(int));

    void *args[] = {&device_dest, &device_src};
    cuda->launch_kernel_2d_sync(transpose_kernel,
            grid_dim_x, grid_dim_y,
            TILE_DIM, 2,
            args);

    cuMemcpyDtoH(dest, device_dest, width*height*sizeof(int));
    cuMemFree(device_src);
    cuMemFree(device_dest);
    cuda->ctx_synchronize();
}
开发者ID:Larhard,项目名称:Objective-Cuda,代码行数:27,代码来源:transpose.cpp

示例5: cuda_over_map

Object cuda_over_map(Object self, int nparts, int *argcv,
        Object *argv, int flags) {
    CUresult error;
    cuInit(0);
    int deviceCount = 0;
    error = cuDeviceGetCount(&deviceCount);
    if (deviceCount == 0) {
        raiseError("No CUDA devices found");
    }
    CUdevice cuDevice;
    CUcontext cuContext;
    CUmodule cuModule;
    CUfunction cuFunc;
    error = cuDeviceGet(&cuDevice, 0);
    error = cuCtxCreate(&cuContext, 0, cuDevice);
    CUdeviceptr d_A;
    CUdeviceptr d_B;
    CUdeviceptr d_res;
    errcheck(cuModuleLoad(&cuModule, grcstring(argv[argcv[0]])));
    CUdeviceptr dps[argcv[0]];
    void *args[argcv[0]+2];
    int size = INT_MAX;
    for (int i=0; i<argcv[0]; i++) {
        struct CudaFloatArray *a = (struct CudaFloatArray *)argv[i];
        if (a->size < size)
            size = a->size;
        errcheck(cuMemAlloc(&dps[i], size * sizeof(float)));
        errcheck(cuMemcpyHtoD(dps[i], &a->data, size * sizeof(float)));
        args[i+1] = &dps[i];
    }
    struct CudaFloatArray *r =
        (struct CudaFloatArray *)(alloc_CudaFloatArray(size));
    int fsize = sizeof(float) * size;
    errcheck(cuMemAlloc(&d_res, fsize));
    errcheck(cuMemcpyHtoD(d_res, &r->data, fsize));
    args[0] = &d_res;
    args[argcv[0]+1] = &size;

    int threadsPerBlock = 256;
    int blocksPerGrid = (size + threadsPerBlock - 1) / threadsPerBlock;
    char name[256];
    strcpy(name, "block");
    strcat(name, grcstring(argv[argcv[0]]) + strlen("_cuda/"));
    for (int i=0; name[i] != 0; i++)
        if (name[i] == '.') {
            name[i] = 0;
            break;
        }
    errcheck(cuModuleGetFunction(&cuFunc, cuModule, name));
    errcheck(cuLaunchKernel(cuFunc, blocksPerGrid, 1, 1,
        threadsPerBlock, 1, 1,
        0,
        NULL, args, NULL));
    errcheck(cuMemcpyDtoH(&r->data, d_res, fsize));
    cuMemFree(d_res);
    for (int i=0; i<argcv[0]; i++)
        cuMemFree(dps[i]);
    return (Object)r;
}
开发者ID:mwh,项目名称:grace-cuda,代码行数:59,代码来源:cuda.c

示例6: main

/**
 * This measures the overhead in launching a kernel function on each GPU in the
 * system.
 *
 * It does this by executing a small kernel (copying 1 value in global memory) a
 * very large number of times and taking the average execution time.  This
 * program uses the CUDA driver API.
 */
int main() {
  CU_ERROR_CHECK(cuInit(0));

  int count;
  CU_ERROR_CHECK(cuDeviceGetCount(&count));

  float x = 5.0f;
  for (int d = 0; d < count; d++) {
    CUdevice device;
    CU_ERROR_CHECK(cuDeviceGet(&device, d));

    CUcontext context;
    CU_ERROR_CHECK(cuCtxCreate(&context, 0, device));

    CUdeviceptr in, out;
    CU_ERROR_CHECK(cuMemAlloc(&in, sizeof(float)));
    CU_ERROR_CHECK(cuMemAlloc(&out, sizeof(float)));
    CU_ERROR_CHECK(cuMemcpyHtoD(in, &x, sizeof(float)));

    CUmodule module;
    CU_ERROR_CHECK(cuModuleLoadData(&module, imageBytes));

    CUfunction function;
    CU_ERROR_CHECK(cuModuleGetFunction(&function, module, "kernel"));

    void * params[] = { &in, &out };

    CUevent start, stop;
    CU_ERROR_CHECK(cuEventCreate(&start, 0));
    CU_ERROR_CHECK(cuEventCreate(&stop, 0));

    CU_ERROR_CHECK(cuEventRecord(start, 0));
    for (int i = 0; i < ITERATIONS; i++)
      CU_ERROR_CHECK(cuLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, params, NULL));

    CU_ERROR_CHECK(cuEventRecord(stop, 0));
    CU_ERROR_CHECK(cuEventSynchronize(stop));

    float time;
    CU_ERROR_CHECK(cuEventElapsedTime(&time, start, stop));

    CU_ERROR_CHECK(cuEventDestroy(start));
    CU_ERROR_CHECK(cuEventDestroy(stop));

    CU_ERROR_CHECK(cuMemFree(in));
    CU_ERROR_CHECK(cuMemFree(out));

    fprintf(stdout, "Device %d: %fms\n", d, (time / (double)ITERATIONS));

    CU_ERROR_CHECK(cuModuleUnload(module));

    CU_ERROR_CHECK(cuCtxDestroy(context));
  }

  return 0;
}
开发者ID:garymacindoe,项目名称:cuda-cholesky,代码行数:64,代码来源:kernel-test.c

示例7: TestSAXPY

CUresult
TestSAXPY( chCUDADevice *chDevice, size_t N, float alpha )
{
    CUresult status;
    CUdeviceptr dptrOut = 0;
    CUdeviceptr dptrIn = 0;
    float *hostOut = 0;
    float *hostIn = 0;

    CUDA_CHECK( cuCtxPushCurrent( chDevice->context() ) );

    CUDA_CHECK( cuMemAlloc( &dptrOut, N*sizeof(float) ) );
    CUDA_CHECK( cuMemsetD32( dptrOut, 0, N ) );
    CUDA_CHECK( cuMemAlloc( &dptrIn, N*sizeof(float) ) );
    CUDA_CHECK( cuMemHostAlloc( (void **) &hostOut, N*sizeof(float), 0 ) );
    CUDA_CHECK( cuMemHostAlloc( (void **) &hostIn, N*sizeof(float), 0 ) );
    for ( size_t i = 0; i < N; i++ ) {
        hostIn[i] = (float) rand() / (float) RAND_MAX;
    }
    CUDA_CHECK( cuMemcpyHtoDAsync( dptrIn, hostIn, N*sizeof(float ), NULL ) );

    {
        CUmodule moduleSAXPY;
        CUfunction kernelSAXPY;
        void *params[] = { &dptrOut, &dptrIn, &N, &alpha };
        
        moduleSAXPY = chDevice->module( "saxpy.ptx" );
        if ( ! moduleSAXPY ) {
            status = CUDA_ERROR_NOT_FOUND;
            goto Error;
        }
        CUDA_CHECK( cuModuleGetFunction( &kernelSAXPY, moduleSAXPY, "saxpy" ) );

        CUDA_CHECK( cuLaunchKernel( kernelSAXPY, 1500, 1, 1, 512, 1, 1, 0, NULL, params, NULL ) );

    }

    CUDA_CHECK( cuMemcpyDtoHAsync( hostOut, dptrOut, N*sizeof(float), NULL ) );
    CUDA_CHECK( cuCtxSynchronize() );
    for ( size_t i = 0; i < N; i++ ) {
        if ( fabsf( hostOut[i] - alpha*hostIn[i] ) > 1e-5f ) {
            status = CUDA_ERROR_UNKNOWN;
            goto Error;
        }
    }
    status = CUDA_SUCCESS;
    printf( "Well it worked!\n" );

Error:
    cuCtxPopCurrent( NULL );
    cuMemFreeHost( hostOut );
    cuMemFreeHost( hostIn );
    cuMemFree( dptrOut );
    cuMemFree( dptrIn );
    return status;
}
开发者ID:AnilVarmaBiruduraju,项目名称:cudahandbook,代码行数:56,代码来源:saxpyDrv.cpp

示例8: try_init

void *swanMalloc( size_t len ) {
	void *ptr;
	CUdeviceptr dptr;
	CUresult err;
	try_init();
	if( len == 0 ) {
//		printf("SWAN: WARNING - swnaMalloc() called with 0\n");
		return NULL;
	}

	err = cuMemAlloc( (CUdeviceptr*) &dptr, len );
	
	ptr = (void*)dptr;

	if ( err != CUDA_SUCCESS ) {
		printf("Attempted to allocate %lu bytes (%lu already allocated)\n", len, state.bytes_allocated );
	abort();
		error("swanMalloc failed\n" );
	}
	state.bytes_allocated += len;


	// MJH likes his memory clean
	swanMemset( ptr, 0, len );
	return ptr;
}
开发者ID:liyuming1978,项目名称:swan,代码行数:26,代码来源:swanlib_nv.c

示例9: halide_dev_malloc

WEAK void halide_dev_malloc(void *user_context, buffer_t* buf) {
    if (buf->dev) {
        // This buffer already has a device allocation
        return;
    }

    size_t size = __buf_size(user_context, buf);

    #ifdef DEBUG
    halide_printf(user_context, "dev_malloc allocating buffer of %zd bytes, "
                  "extents: %zdx%zdx%zdx%zd strides: %zdx%zdx%zdx%zd (%d bytes per element)\n",
                  size, buf->extent[0], buf->extent[1], buf->extent[2], buf->extent[3],
                  buf->stride[0], buf->stride[1], buf->stride[2], buf->stride[3],
                  buf->elem_size);
    #endif

    CUdeviceptr p;
    TIME_CALL( cuMemAlloc(&p, size), "dev_malloc");

    buf->dev = (uint64_t)p;
    halide_assert(user_context, buf->dev);

    #ifdef DEBUG
    halide_assert(user_context, halide_validate_dev_pointer(user_context, buf));
    #endif
}
开发者ID:EEmmanuel7,项目名称:Halide,代码行数:26,代码来源:cuda.cpp

示例10: get_read_ptr_cuda

    CUdeviceptr get_read_ptr_cuda(ComputeEnv *env,int devid, size_t read_byte_size) {
        if (cuda_valid_list[devid]) {
            return cuda_ptr_list[devid];
        }

        if (host_valid == false) {
            /* xx */
            abort();
            return 0;
        }

        CUDADev *dev = &env->cuda_dev_list[devid];
        cuCtxPushCurrent(dev->context);

        if (cuda_ptr_list[devid] == 0) {
            CUresult err;
            err = cuMemAlloc(&cuda_ptr_list[devid], byte_size);
            if (err != CUDA_SUCCESS) {
                abort();
            }
        }

        //double t0 = getsec();
        cuMemcpyHtoD(cuda_ptr_list[devid], host_ptr, read_byte_size);
        //double t1 = getsec();
        //env->transfer_wait = t1-t0;

        cuda_valid_list[devid] = true;

        CUcontext old;
        cuCtxPopCurrent(&old);

        return cuda_ptr_list[devid];
    }
开发者ID:vitaliylag,项目名称:waifu2x-converter-cpp,代码行数:34,代码来源:Buffer.hpp

示例11: main

int main(){
	init_test();
	const std::string source = 
	".version 4.2\n"
	".target sm_20\n"
	".address_size 64\n"
	".visible .entry kernel(.param .u64 kernel_param_0) {\n"
	".reg .s32 	%r<2>;\n"
	".reg .s64 	%rd<3>;\n"
	"bra 	BB1_2;\n"
	"ld.param.u64 	%rd1, [kernel_param_0];\n"
	"cvta.to.global.u64 	%rd2, %rd1;\n"
	"mov.u32 	%r1, 5;\n"
	"st.global.u32 	[%rd2], %r1;\n"
	"BB1_2: ret;\n"
	"}\n";
	CUmodule modId = 0;
	CUfunction funcHandle = 0;
	cu_assert(cuModuleLoadData(&modId, source.c_str()));
	cu_assert(cuModuleGetFunction(&funcHandle, modId, "kernel"));
	CUdeviceptr devValue;
	int hostValue = 10;
	cu_assert(cuMemAlloc(&devValue, sizeof(int)));
	cu_assert(cuMemcpyHtoD(devValue, &hostValue, sizeof(hostValue)));
	void * params[] = {&devValue};
	cu_assert(cuLaunchKernel(funcHandle, 1,1,1, 1,1,1, 0,0, params, nullptr));
	cu_assert(cuMemcpyDtoH(&hostValue, devValue, sizeof(hostValue)));
	assert(hostValue == 10);
	std::cout << hostValue << "\n";
	cu_assert(cuMemFree(devValue));
	cu_assert(cuModuleUnload(modId));
	return 0;
}
开发者ID:sebsgit,项目名称:gemu,代码行数:33,代码来源:basic_branch.cpp

示例12: sararfftnd_one_complex_to_real

void sararfftnd_one_complex_to_real(
  sararfftnd_plan plan, sarafft_complex *h_data
) {
  CUdeviceptr d_data;
  size_t planSize = getPlanSize( plan );
  if ( CUDA_SUCCESS != cuMemAlloc( &d_data, planSize ) ) {
    printf( "cuMemAlloc failed for plansize %li!\n", planSize );
    fflush ( stdout );
    exit( 90 );
  }
  if ( CUDA_SUCCESS != cuMemcpyHtoD( d_data, h_data, planSize ) ) {
    printf( "cuMemcpyHtoD failed!\n" );
    fflush ( stdout );
    exit( 91 );
  }
  if ( CUFFT_SUCCESS != cufftExecC2R( plan, ( cufftComplex* )d_data, ( cufftReal* )d_data ) ) {
    printf( "cufftExecR2C failed!\n" );
    fflush ( stdout );
    exit( 92 );
  }
  if ( CUDA_SUCCESS != cuMemcpyDtoH( h_data, d_data, planSize ) ) {
    printf( "cuMemcpyDtoH failed!\n" );
    fflush ( stdout );
    exit( 93 );
  }
  if ( CUDA_SUCCESS != cuMemFree( d_data ) ) {
    printf( "cuMemFree failed!\n" );
    fflush ( stdout );
    exit( 94 );
  }
}
开发者ID:sara-nl,项目名称:Omnimatch,代码行数:31,代码来源:saracufft.cpp

示例13: prealloc

    bool prealloc(ComputeEnv *env) {
        int devid;
        if (host_ptr == nullptr) {
            host_ptr = _mm_malloc(byte_size, 64);
            if (host_ptr == nullptr) {
                return false;
            }
        }

        switch (env->target_processor.type) {
        case W2XCONV_PROC_HOST:
            break;

        case W2XCONV_PROC_OPENCL:
            devid = env->target_processor.devid;
            if (cl_ptr_list[devid] == nullptr) {
                cl_int err;
                OpenCLDev *dev = &env->cl_dev_list[devid];
                cl_ptr_list[devid] = clCreateBuffer(dev->context,
                                                    CL_MEM_READ_WRITE,
                                                    byte_size, nullptr, &err);
                if (cl_ptr_list[devid] == nullptr) {
                    return false;
                }

                /* touch memory to force allocation */
                char data = 0;
                err = clEnqueueWriteBuffer(dev->queue, cl_ptr_list[devid],
                                           CL_TRUE, 0, 1, &data, 0, nullptr, nullptr);
                if (err != CL_SUCCESS) {
                    clReleaseMemObject(cl_ptr_list[devid]);
                    cl_ptr_list[devid] = nullptr;
                    return false;
                }

            }
            break;

        case W2XCONV_PROC_CUDA:
            devid = env->target_processor.devid;

            if (cuda_ptr_list[devid] == 0) {
                CUresult err;
                CUDADev *dev = &env->cuda_dev_list[devid];
                cuCtxPushCurrent(dev->context);
                err = cuMemAlloc(&cuda_ptr_list[devid], byte_size);
                CUcontext old;
                cuCtxPopCurrent(&old);

                if (err != CUDA_SUCCESS) {
                    return false;
                }
            }
            break;

        }

        return true;
    }
开发者ID:vitaliylag,项目名称:waifu2x-converter-cpp,代码行数:59,代码来源:Buffer.hpp

示例14: mem_alloc

	void mem_alloc(device_memory& mem, MemoryType type)
	{
		cuda_push_context();
		CUdeviceptr device_pointer;
		cuda_assert(cuMemAlloc(&device_pointer, mem.memory_size()))
		mem.device_pointer = (device_ptr)device_pointer;
		cuda_pop_context();
	}
开发者ID:vanangamudi,项目名称:blender-main,代码行数:8,代码来源:device_cuda.cpp

示例15: mem

CUresult CuContext::ByteAlloc(size_t size, DeviceMemPtr* ppMem) {
	DeviceMemPtr mem(new CuDeviceMem);
	CUresult result = cuMemAlloc(&mem->_deviceptr, size);
	HANDLE_RESULT();

	mem->_size = size;
	mem->_context = this;
	ppMem->swap(mem);
	return CUDA_SUCCESS;
}
开发者ID:CindyYang85,项目名称:mgpu,代码行数:10,代码来源:cucpp.cpp


注:本文中的cuMemAlloc函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。