当前位置: 首页>>代码示例>>C++>>正文


C++ cudaFree函数代码示例

本文整理汇总了C++中cudaFree函数的典型用法代码示例。如果您正苦于以下问题:C++ cudaFree函数的具体用法?C++ cudaFree怎么用?C++ cudaFree使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了cudaFree函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: quantus_cuda_cleanup

void quantus_cuda_cleanup(quantus_comm<T> *comm)
{
    cudaFree((T *) comm->matrix);
}
开发者ID:thomasluu,项目名称:quantus,代码行数:4,代码来源:quantus_cuda.cpp

示例2: CUDA_CHECK

GPUParams<Dtype>::~GPUParams() {
#ifndef CPU_ONLY
  CUDA_CHECK(cudaFree(data_));
  CUDA_CHECK(cudaFree(diff_));
#endif
}
开发者ID:bbshocking,项目名称:caffe,代码行数:6,代码来源:parallel.cpp

示例3: free

	static void free(void *data) {
	    if (data) {
		// std::cout << "free " << data << std::endl;
		throw_(cudaFree(data));
	    }
	}
开发者ID:asadchev,项目名称:asadchev,代码行数:6,代码来源:allocator.hpp

示例4: cudaFree

TxVectorOptimizationDataCU::~TxVectorOptimizationDataCU() {
  if (devicePtr) {
    cudaFree(devicePtr);
  }
}
开发者ID:NobodyInAmerica,项目名称:libTxHPCG,代码行数:5,代码来源:TxVectorOptimizationDataCU.cpp

示例5: cudaFree

void CloudConstructor::freeGPUPoints() {
	cudaFree(d_resultPoints);
	d_resultPoints = NULL;
}
开发者ID:damonseeley,项目名称:electroland_repos,代码行数:4,代码来源:CloudConstructor.cpp

示例6: main

int
main()
{
	int i;
	struct timeval start, stop;
	FILE *fd;
	char *key;

	cudaSetDevice(0);

	/* Allocate memory */
	if ((key = (char *)malloc(40 * sizeof(char))) == NULL) {
		printf("Malloc failed!\n");
		exit(EXIT_FAILURE);
	}

	cudaMallocHost((void **) &batchKeys,
	    ((BATCH_SIZE + 1) * MAX_LEN_ALIGNED) * sizeof(char));
	cudaMallocHost((void **) &nKeys, BATCH_SIZE * sizeof(size_t));
	cudaMallocHost((void **) &batchIndex, (BATCH_SIZE + 1) * sizeof(int));
	cudaMallocHost((void **) &hashedKeys, BATCH_SIZE * sizeof(uint32_t));

	cudaMalloc((void **) &d_keys,
	    ((BATCH_SIZE + 1) * MAX_LEN_ALIGNED) * sizeof(char));
        cudaMalloc((void **) &d_len, BATCH_SIZE * sizeof(size_t));
        cudaMalloc((void **) &d_index, (BATCH_SIZE + 1) * sizeof(int));
        cudaMalloc((void **) &d_res, BATCH_SIZE * sizeof(uint32_t));

	/* Create 'BATCH_SIZE' number of random keys 
	 * and add them to batch table
	 */
	batchNo = 0;
        batchIndex[0] = 0;
	for(i = 0; i < BATCH_SIZE; i++) { 
		gen_random(key, 30);
		add_to_batch(key, 30);
	}

	/* Start Time (execution + memory) */
#ifdef EXEC_MEM
	gettimeofday(&start, NULL);
#endif // EXEC_MEM
	
	/* MemCpy Host -> Device */
	cudaMemcpy(d_keys, batchKeys, (batchIndex[BATCH_SIZE-1] +
	    strlen(&batchKeys[batchIndex[BATCH_SIZE - 1]])) * sizeof(char),
	    cudaMemcpyHostToDevice);
        cudaMemcpy(d_len, nKeys, BATCH_SIZE * sizeof(size_t),
	    cudaMemcpyHostToDevice);
        cudaMemcpy(d_index, batchIndex, BATCH_SIZE * sizeof(int),
	    cudaMemcpyHostToDevice);

	/* Start Time (execution only)*/
#ifndef EXEC_MEM
	gettimeofday(&start, NULL);
#endif // EXEC_MEM

	/* Call the kernel */
	CUDAhash(d_keys, d_index, d_len, d_res);

	/* Start Time (execution only)*/
#ifndef EXEC_MEM
	cudaDeviceSynchronize();
	gettimeofday(&stop, NULL);
#endif // EXEC_MEM

	/* MemCpy Device -> Host */
	cudaMemcpy(hashedKeys, d_res, BATCH_SIZE * sizeof(uint32_t),
	    cudaMemcpyDeviceToHost);	
	
	/* Start Time (execution + memory) */
#ifdef EXEC_MEM
	gettimeofday(&stop, NULL);
#endif // EXEC_MEM

	
#ifdef DEBUG
	for(i = 0; i < BATCH_SIZE; i++) {
		printf("%s\n", &batchKeys[batchIndex[i]]);
		printf("%u\n", hashedKeys[i]);
	}
#endif // DEBUG

	/* Print Time */
	fd = fopen("log.txt", "a+");
	fprintf(fd, "%lu", ((stop.tv_sec * USECS) + stop.tv_usec ) -
	    ((start.tv_sec * USECS) + start.tv_usec));
	fprintf(fd, "\t%1.f\n", ((double)BATCH_SIZE / 
	    ((double)(((stop.tv_sec * USECS) + stop.tv_usec ) -
	    ((start.tv_sec * USECS) + start.tv_usec)) / 1000000 )) / 1000);
	fclose(fd);

#ifdef DEBUG
	printf("Time: %lu \n", ((stop.tv_sec * USECS) + stop.tv_usec ) -
	    ((start.tv_sec * USECS) + start.tv_usec));
#endif // DEBUG
	
        /* Free memory */
        cudaFree(batchKeys);
	cudaFree(nKeys);
//.........这里部分代码省略.........
开发者ID:deyannis,项目名称:HY527,代码行数:101,代码来源:hash.c

示例7: CUDA_SAFE_CALL

void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const
{
  try {
    CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) );
  } catch(...) {}
}
开发者ID:gmackey,项目名称:kokkos,代码行数:6,代码来源:Kokkos_CudaSpace.cpp

示例8: calculateOnGPU


//.........这里部分代码省略.........
    //move constants variables to constant cuda memory
    setConstants(partSeqSize, partsNumber, overlapLength, seqLibLength,
                queryLength, gapOpen, gapExtension, maxScore, partQuerySize,
                U2::SmithWatermanAlgorithm::UP, U2::SmithWatermanAlgorithm::LEFT, U2::SmithWatermanAlgorithm::DIAG,
                U2::SmithWatermanAlgorithm::STOP);

    size_t sh_mem_size = sizeof(ScoreType) * (dimGrid.x + 1) * 3;
    u2log.details(QString("SHARED MEM SIZE USED: %1 B").arg(sh_mem_size));
    // start main loop
    for (int i = 0; i < queryDevider; i++) {

        calculateMatrix_wrap( dimBlock.x, dimGrid.x, g_seqLib,
            g_queryProfile, g_HdataUp, g_HdataRec, g_HdataMax,
            g_FdataUp, g_directionsUp, g_directionsRec,
            g_directionsMax, i * partQuerySize, g_directionsMatrix, g_backtraceBegins);

        cudaError hasErrors = cudaThreadSynchronize();

        if (hasErrors != 0) {
            u2log.trace(QString("CUDA ERROR HAPPEN, errorId: ") + QString::number(hasErrors));
        }

        //revert arrays
        g_HdataTmp = g_HdataRec;
        g_HdataRec = g_HdataUp;
        g_HdataUp = g_HdataTmp;

        g_HdataTmp = g_directionsRec;
        g_directionsRec = g_directionsUp;
        g_directionsUp = g_HdataTmp;
    }

    //Copy vectors on host and find actual results
    cudaMemcpy(tempRow, g_HdataMax, sizeQQ, cudaMemcpyDeviceToHost);
    cudaMemcpy(directionRow, g_directionsMax, sizeQQ, cudaMemcpyDeviceToHost);
    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        cudaMemcpy(globalMatrix, g_directionsMatrix, directionMatrixSize, cudaMemcpyDeviceToHost);
        cudaMemcpy(backtraceBegins, g_backtraceBegins, backtraceBeginsSize, cudaMemcpyDeviceToHost);
    }

    QList<resType> pas;
    resType res;
    for (int j = 0; j < (sizeRow); j++) {
        if (tempRow[j] >= maxScore) {
            res.refSubseq.startPos = directionRow[j];
            res.refSubseq.length = j - res.refSubseq.startPos + 1 - (j) / (partSeqSize + 1) * overlapLength - (j) / (partSeqSize + 1);
            res.score = tempRow[j];
            if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
                qint32 pairAlignOffset = 0;

                qint32 row = backtraceBegins[2 * j];
                qint32 column = backtraceBegins[2 * j + 1];
                while(U2::SmithWatermanAlgorithm::STOP != globalMatrix[seqLibLength * row + column]) {
                    if(U2::SmithWatermanAlgorithm::DIAG == globalMatrix[seqLibLength * row + column]) {
                        res.pairAlign[pairAlignOffset++] = U2::SmithWatermanAlgorithm::DIAG;
                        row--;
                        column--;
                    } else if(U2::SmithWatermanAlgorithm::LEFT == globalMatrix[seqLibLength * row + column]) {
                        res.pairAlign[pairAlignOffset++] = U2::SmithWatermanAlgorithm::UP;
                        column--;
                    } else if(U2::SmithWatermanAlgorithm::UP == globalMatrix[seqLibLength * row + column]) {
                        res.pairAlign[pairAlignOffset++] = U2::SmithWatermanAlgorithm::LEFT;
                        row--;
                    }
                    if(0 >= row || 0 >= column) {
                        break;
                    }
                }
                res.patternSubseq.startPos = row;
                res.patternSubseq.length = backtraceBegins[2 * j] - row + 1;
            }

            pas.append(res);
        }
    }

    //deallocation memory
    cudaFree(g_seqLib);
    cudaFree(g_queryProfile);
    cudaFree(g_HdataMax);
    cudaFree(g_HdataUp);
    cudaFree(g_HdataRec);
    cudaFree(g_FdataUp);
    cudaFree(g_directionsUp);
    cudaFree(g_directionsMax);
    cudaFree(g_directionsRec);

    if(U2::SmithWatermanSettings::MULTIPLE_ALIGNMENT == resultView) {
        cudaFree(g_directionsMatrix);
        cudaFree(g_backtraceBegins);
    }

    delete[] tempRow;
    delete[] directionRow;
    delete[] zerroArr;
    delete[] globalMatrix;
    delete[] backtraceBegins;

    return pas;
}
开发者ID:ugeneunipro,项目名称:ugene,代码行数:101,代码来源:sw_cuda_cpp.cpp

示例9:

 ~curandStateManager()
 {
     //if(_state != NULL) memFree((char*)_state);
     if(_state != NULL) CUDA_CHECK(cudaFree(_state));
 }
开发者ID:hxiaox,项目名称:arrayfire,代码行数:5,代码来源:random.hpp

示例10: sci_gpuLU


//.........这里部分代码省略.........
                default : throw "First option argument must be 0 or 1 or 2.";
            }

            switch((int)option[1])
            {
                case 0 :    // Don't keep the data input on Device.
                {
                    if(inputType_A == sci_matrix)
                    {
                        status = cublasFree(d_A);
                        if (status != CUBLAS_STATUS_SUCCESS) throw status;
                        d_A = NULL;
                    }
                    break;
                }
                case 1 :    // Keep data of the fisrt argument on Device and return the Device pointer.
                {
                    if(inputType_A == sci_matrix)
                    {
                        gpuMat_CUDA* dptr;
                        gpuMat_CUDA tmp={getCudaContext()->genMatrix<double>(getCudaQueue(),rows_A*cols_A),rows_A,cols_A};
                        dptr=new gpuMat_CUDA(tmp);
						dptr->useCuda = true;
                        dptr->ptr->set_ptr((double*)d_A);
                        if(bComplex_A)
                            dptr->complex=TRUE;
                        else
                            dptr->complex=FALSE;

                        sciErr = createPointer(pvApiCtx,Rhs+posOutput, (void*)dptr);
                        if(sciErr.iErr) throw sciErr;
                        LhsVar(posOutput)=Rhs+posOutput;
                    }
                    else
                        throw "The first input argument is already a GPU variable.";

                    posOutput++;
                    break;
                }

                default : throw "Second option argument must be 0 or 1.";
            }
            // Shutdown
            status = cublasShutdown();
            if (status != CUBLAS_STATUS_SUCCESS) throw status;
        }
        #endif

        #ifdef WITH_OPENCL
        if (!useCuda())
        {
            throw "not implemented with OpenCL.";
        }
        #endif
        if(Rhs == 1)
        {
            free(option);
            option = NULL;
        }

        if(posOutput < Lhs+1)
            throw "Too many output arguments.";

        if(posOutput > Lhs+1)
            throw "Too few output arguments.";

        PutLhsVar();
        return 0;
    }
    catch(const char* str)
    {
        Scierror(999,"%s\n",str);
    }
    catch(SciErr E)
    {
        printError(&E, 0);
    }
    #ifdef WITH_CUDA
    catch(cudaError_t cudaE)
    {
        GpuError::treat_error<CUDAmode>((CUDAmode::Status)cudaE);
    }
    catch(cublasStatus CublasE)
    {
        GpuError::treat_error<CUDAmode>((CUDAmode::Status)CublasE,1);
    }
    if (useCuda())
    {
        if(inputType_A == 1 && d_A != NULL) cudaFree(d_A);
    }
    #endif
    #ifdef WITH_OPENCL
    if (!useCuda())
    {
        Scierror(999,"not implemented with OpenCL.\n");
    }
    #endif
    if(Rhs == 1 && option != NULL) free(option);
    return EXIT_FAILURE;
}
开发者ID:dawuweijun,项目名称:scigpgpu,代码行数:101,代码来源:sci_gpuLU.cpp

示例11: main


//.........这里部分代码省略.........
    {
        fprintf(stderr, "!!!! device access error (write C)\n");
        return EXIT_FAILURE;
    }

    /* Performs operation using plain C code */
    simple_sgemm(N, alpha, h_A, h_B, beta, h_C);
    h_C_ref = h_C;

    /* Performs operation using cublas */
    status = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, d_A, N, d_B, N, &beta, d_C, N);

    if (status != CUBLAS_STATUS_SUCCESS)
    {
        fprintf(stderr, "!!!! kernel execution error.\n");
        return EXIT_FAILURE;
    }

    /* Allocate host memory for reading back the result from device memory */
    h_C = (float *)malloc(n2 * sizeof(h_C[0]));

    if (h_C == 0)
    {
        fprintf(stderr, "!!!! host memory allocation error (C)\n");
        return EXIT_FAILURE;
    }

    /* Read the result back */
    status = cublasGetVector(n2, sizeof(h_C[0]), d_C, 1, h_C, 1);

    if (status != CUBLAS_STATUS_SUCCESS)
    {
        fprintf(stderr, "!!!! device access error (read C)\n");
        return EXIT_FAILURE;
    }

    /* Check result against reference */
    error_norm = 0;
    ref_norm = 0;

    for (i = 0; i < n2; ++i)
    {
        diff = h_C_ref[i] - h_C[i];
        error_norm += diff * diff;
        ref_norm += h_C_ref[i] * h_C_ref[i];
    }

    error_norm = (float)sqrt((double)error_norm);
    ref_norm = (float)sqrt((double)ref_norm);

    if (fabs(ref_norm) < 1e-7)
    {
        fprintf(stderr, "!!!! reference norm is 0\n");
        return EXIT_FAILURE;
    }

    /* Memory clean up */
    free(h_A);
    free(h_B);
    free(h_C);
    free(h_C_ref);

    if (cudaFree(d_A) != cudaSuccess)
    {
        fprintf(stderr, "!!!! memory free error (A)\n");
        return EXIT_FAILURE;
    }

    if (cudaFree(d_B) != cudaSuccess)
    {
        fprintf(stderr, "!!!! memory free error (B)\n");
        return EXIT_FAILURE;
    }

    if (cudaFree(d_C) != cudaSuccess)
    {
        fprintf(stderr, "!!!! memory free error (C)\n");
        return EXIT_FAILURE;
    }

    /* Shutdown */
    status = cublasDestroy(handle);

    if (status != CUBLAS_STATUS_SUCCESS)
    {
        fprintf(stderr, "!!!! shutdown error (A)\n");
        return EXIT_FAILURE;
    }

    if (error_norm / ref_norm < 1e-6f)
    {
        printf("simpleCUBLAS test passed.\n");
        exit(EXIT_SUCCESS);
    }
    else
    {
        printf("simpleCUBLAS test failed.\n");
        exit(EXIT_FAILURE);
    }
}
开发者ID:intersense,项目名称:ox-cuda,代码行数:101,代码来源:simpleCUBLAS.cpp

示例12: main


//.........这里部分代码省略.........
			
			int sem_status = sem_wait(sem1);
			if (sem_status == -1)
			{
				fprintf(stderr, "Cannot wait on semaphore #1 by process %d, errno = %d\n",
					pid, errno);
				return errno;
			}			

			sem_status = sem_post(sem2);
			if (sem_status == -1)
			{
				fprintf(stderr, "Cannot post on semaphore #2 by process %d, errno = %d\n",
					pid, errno);
				return errno;
			}
		}

		// At this point two processes are synchronized.

		config.step++;
		
		// Reassign porcesses' input data segments to show some
		// possible manipulation on shared memory.
		// Here we perform cyclic shift of data pointers.
		config.idevice++;
		config.idevice %= ndevices + 1;
		config.inout_cpu = inout +  config.idevice * np;
	}

	// Release device buffers.
	if (worker)
	{
		cuda_status = cudaFree(config.in_dev);
		if (cuda_status != cudaSuccess)
		{
			fprintf(stderr, "Cannot release input buffer by process %d, status = %d\n",
				pid, cuda_status);
			return cuda_status;
		}
		cuda_status = cudaFree(config.out_dev);
		if (cuda_status != cudaSuccess)
		{
			fprintf(stderr, "Cannot release output buffer by process %d, status = %d\n",
				pid, cuda_status);
			return cuda_status;
		}
	}
	else
	{
		free(config.in_dev);
		free(config.out_dev);
	}
	
	printf("Device %d deinitialized py process %d\n", config.idevice, pid);

	// On master process perform results check:
	// compare each GPU result to CPU result.
	if (master)
	{
		float* control = inout + np * ndevices;
		for (int idevice = 0; idevice < ndevices; idevice++)
		{
			// Find the maximum abs difference.
			int maxi = 0, maxj = 0;
			float maxdiff = fabs(control[0] - (inout + idevice * np)[0]);
开发者ID:7633,项目名称:msu-cuda-course,代码行数:67,代码来源:shmem_mmap_cuda.c

示例13:

OsdCudaTable::~OsdCudaTable() {

    if (_devicePtr) cudaFree(_devicePtr);
}
开发者ID:Len3d,项目名称:OpenSubdiv,代码行数:4,代码来源:cudaComputeContext.cpp

示例14: main

int main(int argc, char *argv[])
{
    // needed to work correctly with piped benchmarkrunner
    setlinebuf(stdout);
    setlinebuf(stdin);

    int n_indices = 1;
    int n_dimensions = 1;
    char inBuf[200]; // ridiculously large input buffer.
    
    bool isFirst = true;

  do {

    // Allocate memory for the arrays
    int *h_indices = 0;
    double        *h_outputGPU  = 0;

    try
    {
        h_indices = new int [n_indices * n_dimensions];
        h_outputGPU  = new double [n_indices * n_dimensions];
    }
    catch (std::exception e)
    {
        std::cerr << "Caught exception: " << e.what() << std::endl;
        std::cerr << "Unable to allocate CPU memory (try running with fewer vectors/dimensions)" << std::endl;
        return -1;
    }

    int *d_indices;
    double        *d_output;

    try
    {
        cudaError_t cudaResult;
        cudaResult = cudaMalloc((void **)&d_indices, n_dimensions * n_indices * sizeof(int));

        if (cudaResult != cudaSuccess)
        {
            throw std::runtime_error(cudaGetErrorString(cudaResult));
        }
    }
    catch (std::runtime_error e)
    {
        std::cerr << "Caught exception: " << e.what() << std::endl;
        std::cerr << "Unable to allocate GPU memory (try running with fewer vectors/dimensions)" << std::endl;
        return -1;
    }

    // Initialize the indices (done on the host)
    for(int i = 0; i < n_indices; i++) {
      h_indices[i] = i;
    }

    // Copy the indices to the device
    cudaMemcpy(d_indices, h_indices, n_dimensions * n_indices * sizeof(int), cudaMemcpyHostToDevice);
    cudaDeviceSynchronize();

    // Execute the QRNG on the device
    int n_vec;
    sobol_nikola_unsimplified(n_indices, d_indices, n_indices, &d_output, &n_vec);

    cudaDeviceSynchronize();

    cudaMemcpy(h_outputGPU, d_output, n_indices * n_dimensions * sizeof(double), cudaMemcpyDeviceToHost);

    // Cleanup and terminate
    delete h_indices;
    cudaFree(d_indices);
    cudaFree(d_output);

    if(!isFirst) {
      printf("RESULT ");

      for(int i = 0; i < std::min(n_indices,10); i++)
        printf("%f ", h_outputGPU[i]);

      printf("\n");
    }
    else {
      printf("OK\n");
      isFirst = false;
    }

    delete h_outputGPU;

      fgets(inBuf, 200, stdin);

      if (sscanf(inBuf, "%u", &n_indices) == 0)
      {
        // if input is not a number, it has to be "EXIT"
        if (strncmp("EXIT",inBuf,4)==0)
        {
          printf("OK\n");
          break;
        }
        else
        {
          printf("ERROR. Bad input: %s\n", inBuf);
//.........这里部分代码省略.........
开发者ID:HIPERFIT,项目名称:vectorprogramming,代码行数:101,代码来源:sobol.cpp

示例15: gpuErrchk

PhysicsProcessor::~PhysicsProcessor(void)
{
	gpuErrchk(cudaFree(d_V));
}
开发者ID:Aloalo,项目名称:RTRT,代码行数:4,代码来源:PhysicsProcessor.cpp


注:本文中的cudaFree函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。