当前位置: 首页>>代码示例>>C++>>正文


C++ cublasCreate函数代码示例

本文整理汇总了C++中cublasCreate函数的典型用法代码示例。如果您正苦于以下问题:C++ cublasCreate函数的具体用法?C++ cublasCreate怎么用?C++ cublasCreate使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了cublasCreate函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: cublas_handle_

Caffe::Caffe()
    : cublas_handle_(NULL),cusparse_handle_(NULL),cusparse_descr_(NULL),curand_generator_(NULL),random_generator_(),mode_(Caffe::CPU), solver_count_(1), root_solver_(true){
  // Try to create a cublas handler, and report an error if failed (but we will
  // keep the program running as one might just want to run CPU code).
    LOG(INFO)<<"caffe init.";
    if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
    LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
  }
//add cusparse handler
  if (cusparseCreate(&cusparse_handle_)!=CUSPARSE_STATUS_SUCCESS){
    LOG(ERROR) << "cannot create Cusparse handle,Cusparse won't be available.";
  }
 if(cusparseCreateMatDescr(&cusparse_descr_)!=CUSPARSE_STATUS_SUCCESS){
   LOG(ERROR) << "cannot create Cusparse descr,descr won't be available.";
 }else{
  cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
  cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
  LOG(INFO)<<"init descr";
 }
  // Try to create a curand handler.
  if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
      != CURAND_STATUS_SUCCESS ||
      curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
      != CURAND_STATUS_SUCCESS) {
    LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
  }
  LOG(INFO)<<"caffe finish";
}
开发者ID:ZhouYuSong,项目名称:caffe-pruned,代码行数:28,代码来源:common.cpp

示例2: SwitchDevice

void Context::Init(int device_id) {
  device_id_ = device_id;

  SwitchDevice();

#if defined(USE_CUDA)
  if (blas_handle_ == nullptr) {
    CUBLAS_CHECK(cublasCreate((cublasHandle_t*)&blas_handle_));
    CHECK_NOTNULL(blas_handle_);
  }
#endif

#if defined(USE_CUDNN)
  if (cudnn_handle_ == nullptr) {
    CUDNN_CHECK(cudnnCreate((cudnnHandle_t*)&cudnn_handle_));
    CHECK_NOTNULL(cudnn_handle_);
  }
#endif

#if defined(USE_NNPACK)
  if (nnpack_handle_ == nullptr) {
    CHECK_EQ(nnp_initialize(), nnp_status_success);
    nnpack_handle_ = pthreadpool_create(0);
    CHECK_NOTNULL(nnpack_handle_);
  }
#endif
}
开发者ID:junluan,项目名称:shadow,代码行数:27,代码来源:context.cpp

示例3: gpuSetUp

void gpuSetUp(const int maxBlocksPerKernel, const int n) {
  debug("setting up cuBLAS");
  if (cublasCreate(&g_cublasHandle) != CUBLAS_STATUS_SUCCESS) {
    fatal("couldn't open cuBLAS handle");
  }
  cuSetUp(maxBlocksPerKernel, n);
}
开发者ID:zauberkraut,项目名称:acmi,代码行数:7,代码来源:linalg.c

示例4: micronn_read

micronn* micronn_read(FILE* file)
{
    uint i, tmp;
    cublasStatus_t stat;

    micronn* net = malloc(sizeof(micronn));

    stat = cublasCreate(&net->handle);
    if(stat != CUBLAS_STATUS_SUCCESS) {
        fprintf(stderr, "CUBLAS initialization failed\n");
        return NULL;
    }

    fscanf(file, "micro neural network\n");
    fscanf(file, "ninputs: %d\nnoutputs: %d\nnhidden %d\n", &net->nin, &net->nout, &net->nhidden);
    fscanf(file, "chidden:");

    net->weights = malloc(sizeof(micronn_matrix*) * (net->nhidden + 1));
    net->chidden = malloc(sizeof(uint) * net->nhidden);

    for(i = 0; i < net->nhidden; i++) {
        fscanf(file, " %d", &net->chidden[i]);
    }
    fscanf(file, "\n");
    for(i = 0; i <= net->nhidden; i++) {
        fscanf(file, "weight %d:\n", &tmp);
        net->weights[i] = micronn_matrix_read(file);
    }
    return net;
};
开发者ID:microo8,项目名称:micronn,代码行数:30,代码来源:micronn.c

示例5: blasx_resource_init

void blasx_resource_init(int GPUs, cublasHandle_t* handles, cudaStream_t* streams, cudaEvent_t* events, void** C_dev, int floatType_id) {
    if(floatType_id == 0) C_dev = (float**)  C_dev;
    else if(floatType_id == 1) C_dev = (double**) C_dev;
    else              C_dev = (cuDoubleComplex**) C_dev;
    int GPU_id = 0;
    for (GPU_id = 0; GPU_id < GPUs; GPU_id++) {
        assert( cudaSetDevice(GPU_id) == cudaSuccess );
        //create handles
        assert( cublasCreate(&handles[GPU_id]) == CUBLAS_STATUS_SUCCESS);
        //create streams and event
        int i = 0;
        for (i = 0 ; i < STREAMNUM; i++) {
            assert( cudaStreamCreate(&streams[i+GPU_id*STREAMNUM]) == cudaSuccess );
            assert( cudaEventCreateWithFlags(&events[i+GPU_id*STREAMNUM], cudaEventDisableTiming) == cudaSuccess );
        }
        //create C_dev
        for (i = 0; i < STREAMNUM*2; i++) {
            if (floatType_id == 0) {
                assert( cudaMalloc((void**)&C_dev[i+GPU_id*STREAMNUM*2], sizeof(float)*BLOCKSIZE_SGEMM*BLOCKSIZE_SGEMM) == cudaSuccess );
            }else if (floatType_id == 1) {
                 assert( cudaMalloc((void**)&C_dev[i+GPU_id*STREAMNUM*2], sizeof(double)*BLOCKSIZE_DGEMM*BLOCKSIZE_DGEMM) == cudaSuccess );
            } else {
                 assert( cudaMalloc((void**)&C_dev[i+GPU_id*STREAMNUM*2], sizeof(cuDoubleComplex)*BLOCKSIZE_ZGEMM*BLOCKSIZE_ZGEMM) == cudaSuccess );
            }
        }
    }
}
开发者ID:529038378,项目名称:BLASX,代码行数:27,代码来源:blasx_internal.c

示例6: gpu_cublas1

void gpu_cublas1(double *A, double *B, double *C, double *D, double *r, double *nrmC, int N, int N2)
{
	#pragma acc data present(A, B, C, D)
	{
		#pragma acc host_data use_device(A, B, C, D)
		{
			cublasHandle_t handle;
			cublasCreate(&handle);
			const double alpha = 1.0;
			const double beta = 0.0;
			cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, N, N, N, &alpha, A, N, B, N, &beta, C, N);
			printf(" gpu gemm success \n");
			cublasDdot(handle, N2, C, 1, B, 1, r);
			printf(" gpu dot success \n");
			*r = -1.0 * *r;
			cublasDaxpy(handle, N2, r, B, 1, C, 1);
			printf(" gpu axpy success \n");
			cublasDnrm2(handle, N2, C, 1, nrmC);
			printf(" gpu nrm2 success \n");
			cublasDcopy(handle, N2, C, 1, D, 1);
			printf(" gpu copy success \n");
			*nrmC = 1.0 / *nrmC;
			cublasDscal(handle, N2, nrmC, D, 1);
			printf(" gpu scal success \n");
			cublasDestroy(handle);
			printf(" gpu destroy success \n");
		}
	}
}
开发者ID:xflying777,项目名称:OpenAcc,代码行数:29,代码来源:cublas_level_1.cpp

示例7: CUDA_CHECK

void Caffe::SetDevice(const int device_id) {
  int current_device;
  CUDA_CHECK(cudaGetDevice(&current_device));
  if (current_device == device_id) {
    return;
  }
  // The call to cudaSetDevice must come before any calls to Get, which
  // may perform initialization using the GPU.
  CUDA_CHECK(cudaSetDevice(device_id));
  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
  if (Get().cusparse_descr_)CUSPARSE_CHECK(cusparseDestroyMatDescr(Get().cusparse_descr_));
  if (Get().cusparse_handle_)CUSPARSE_CHECK(cusparseDestroy(Get().cusparse_handle_));
  if (Get().curand_generator_) {
    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
  }
  CUSPARSE_CHECK(cusparseCreate(&Get().cusparse_handle_));
  CUSPARSE_CHECK(cusparseCreateMatDescr(&Get().cusparse_descr_));
//  cusparseSetMatType(cusparse_descr_,CUSPARSE_MATRIX_TYPE_GENERAL);
//  cusparseSetMatIndexBase(cusparse_descr_,CUSPARSE_INDEX_BASE_ZERO);
  LOG(INFO)<<"set descr";
  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
      CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
      cluster_seedgen()));
}
开发者ID:ZhouYuSong,项目名称:caffe-pruned,代码行数:26,代码来源:common.cpp

示例8: mpla_init_instance_block_rows

void mpla_init_instance_block_rows(struct mpla_instance* instance, MPI_Comm comm)
{
        instance->comm = comm;

        // get number of process
        MPI_Comm_size(comm, &(instance->proc_count));

        // find number of current process
        MPI_Comm_rank(comm, &(instance->cur_proc_rank));
        if (instance->cur_proc_rank==0)
                instance->is_parent = true;
        else
                instance->is_parent = false;

        // compute the process grid, enforcing only a parallelization over rows
        int dims[2];
        dims[0]=instance->proc_count;
	dims[1]=1;
        MPI_Dims_create(instance->proc_count, 2, dims);
        instance->proc_rows = dims[0];
        instance->proc_cols = dims[1];

        // create cartesian communicator and retrieve cartesian coordinates
        int periods[2];
        periods[0]=periods[1]=0;
        MPI_Cart_create(comm, 2, dims, periods, 0, &(instance->comm));
        int cur_proc_coord[2];
        MPI_Cart_get(instance->comm, 2, dims, periods, cur_proc_coord);
        instance->cur_proc_row = cur_proc_coord[0];
        instance->cur_proc_col = cur_proc_coord[1];;

        cublasCreate(&(instance->cublas_handle));

}
开发者ID:zaspel,项目名称:MPLA,代码行数:34,代码来源:mpla.cpp

示例9: GPUsgemv

void GPUsgemv(int gpuInner, int Md,int Nd,int Kd,float* Adevice,float *Bdevice,float *Cdevice,float *Ahost,float *Bhost,float *Chost, cudaStream_t *stream) {
  cudaError_t error;
  int memSizeA = sizeof(float)*Md*Nd;
  int memSizeB = sizeof(float)*Nd;
  int memSizeC = sizeof(float)*Md;

  error = cudaMemcpyAsync(Adevice,Ahost,memSizeA,cudaMemcpyHostToDevice,*stream); if (error != cudaSuccess){printf("cudaMemcpy A returned error code %d, line(%d)\n", error, __LINE__);exit(EXIT_FAILURE);}
  error = cudaMemcpyAsync(Bdevice,Bhost,memSizeB,cudaMemcpyHostToDevice,*stream); if (error != cudaSuccess){printf("cudaMemcpy B returned error code %d, line(%d)\n", error, __LINE__);exit(EXIT_FAILURE);}

  // setup execution parameters
  dim3 threads(block_size,block_size);
  dim3 grid(Nd/threads.x,Md/threads.y);

  // inside CUBLAS
  cublasHandle_t handle;
  cublasStatus_t ret;
  ret = cublasCreate(&handle); if (ret != CUBLAS_STATUS_SUCCESS){printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);exit(EXIT_FAILURE);}
  const float alpha = 1.0f;
  const float beta  = 0.0f;
  cublasSetStream(handle,*stream);
  for (int i = 0; i < gpuInner; i++) {
    ret = cublasSgemv(handle, CUBLAS_OP_N, Md, Nd, &alpha, Adevice, Md, Bdevice, 1, &beta, Cdevice, 1);
    if (ret != CUBLAS_STATUS_SUCCESS) {
      printf("cublasSgemm returned error code %d, line(%d)\n", ret, __LINE__);
      exit(EXIT_FAILURE);
    }
  }
  // done CUBLAS

  // copy result back to host
  error = cudaMemcpyAsync(Chost,Cdevice,memSizeC,cudaMemcpyDeviceToHost,*stream);
  //  printf("GPU Iter queued\n");
}
开发者ID:agearh,项目名称:dissertation,代码行数:33,代码来源:sgemv_hetero_blas.cpp

示例10: contractTT

void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
	cublasHandle_t handle;
	cublasCreate(&handle);
	type result=0;

	sTensorGPU temp1 = emptyTensor(size*size,2);
	sTensorGPU temp2 = emptyTensor(size*size*2,3);
	cudaEvent_t start;
	cudaEventCreate(&start);
	cudaEvent_t stop;
	cudaEventCreate(&stop);

	//printf("Start contractTT\n");

	cudaEventRecord(start, NULL);
	int indA = TT1[0].size[0];
	int indB = TT2[0].size[0];

	sTensorCPU tt1start = copyToCPU(TT1[0]);
	sTensorCPU tt2start = copyToCPU(TT2[0]);
	sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
	sTensorCPU tt2end = copyToCPU( TT2[n - 1]);


	for (int i = 0; i < indA; i++){
		TT1[0] = prepareTensorStart(tt1start, i);
		TT1[n - 1] = prepareTensorEnd(tt1end, i);
		for (int j = 0; j < indB; j++){
			TT2[0] = prepareTensorStart(tt2start, j);
			TT2[n - 1] = prepareTensorEnd(tt2end, j);
			contractTensor(handle, TT1[0], TT2[0], temp1);
			for (int i = 1; i < n; i++){
				contractTensor(handle, temp1, TT1[i], temp2);
				contractTensor(handle, temp2, TT2[i], temp1, 2);
			}
			type add = 0;
			cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
			//printf("%e ", add);
			result += add;
		}
	}
	cudaEventRecord(stop, NULL);
	cudaEventSynchronize(stop);
	
	float msecTotal = 0.0f;
	cudaEventElapsedTime(&msecTotal, start, stop);
	printf("Time: %.3fms\n", msecTotal);
	printf("Ops: %.0f\n", bops);
	double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
	printf("Perf= %.2f GFlop/s\n", gigaFlops);

	cublasDestroy(handle);
	cudaDeviceReset();

	printf("%.5e \n", result);
	exit(0);
}
开发者ID:thomas-hoer,项目名称:cuTT,代码行数:58,代码来源:bigSizeTensors.cpp

示例11: cublasCreate

cublasHandle_t CudaUtil::cublasInit()
{
	cublasHandle_t handle;
	cublasStatus_t status = cublasCreate(&handle);
	if (status != CUBLAS_STATUS_SUCCESS) {
		throw CudaException("CUBALS initialisation error");
	}
	return handle;
}
开发者ID:onedigit,项目名称:org.onedigit.cuda.cpp,代码行数:9,代码来源:CudaUtil.cpp

示例12: device

GpuDevice::Impl::Impl(int d) : device(d) {
  ActivateDevice();
  for (size_t i = 0; i < kParallelism; ++i) {
    CUDA_CALL(cudaStreamCreate(&stream[i]));
    CUBLAS_CALL(cublasCreate(&cublas_handle[i]));
    CUBLAS_CALL(cublasSetStream(cublas_handle[i], stream[i]));
    CUDNN_CALL(cudnnCreate(&cudnn_handle[i]));
    CUDNN_CALL(cudnnSetStream(cudnn_handle[i], stream[i]));
  }
}
开发者ID:AI42,项目名称:minerva,代码行数:10,代码来源:device.cpp

示例13: initCublasHandle

void initCublasHandle(cublasHandle_t* handle)
{
	cublasStatus_t stat;
	stat = cublasCreate(handle);
	if (stat != CUBLAS_STATUS_SUCCESS)
	{
		printf("CUBLAS INITIALIZATION FAILED");
	}

}
开发者ID:Tifuera,项目名称:linear_eq_symm_band_matrix,代码行数:10,代码来源:gpu_util.cpp

示例14: mode_

Caffe::Caffe()
  : mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL),
  curand_generator_(NULL), vsl_stream_(NULL)
{
  CUBLAS_CHECK(cublasCreate(&cublas_handle_));
  //TODO: original caffe code has bug here!
  CURAND_CHECK(curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT));
  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator_, 1701ULL));
  VSL_CHECK(vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, 1701));
}
开发者ID:xiaobinxu,项目名称:recaffe,代码行数:10,代码来源:common.cpp

示例15: init

/*init cublas Handle*/
bool init(cublasHandle_t& handle)
{
	cublasStatus_t stat;
	stat = cublasCreate(&handle);
	if(stat != CUBLAS_STATUS_SUCCESS) {
		printf ("init: CUBLAS initialization failed\n");
		exit(0);
	}
	return true;
}
开发者ID:Donkey-Sand,项目名称:CUDA-CNN,代码行数:11,代码来源:main.cpp


注:本文中的cublasCreate函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。