本文整理汇总了Java中jcuda.runtime.JCuda.cudaMalloc方法的典型用法代码示例。如果您正苦于以下问题:Java JCuda.cudaMalloc方法的具体用法?Java JCuda.cudaMalloc怎么用?Java JCuda.cudaMalloc使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcuda.runtime.JCuda
的用法示例。
在下文中一共展示了JCuda.cudaMalloc方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main2
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* @param args
*/
public static void main2(String[] args) {
Pointer devPtr = new Pointer();
JCuda.cudaMalloc(devPtr, 1024 * 1024 * 1024);
logger.info("Pointer: "+devPtr);
JCuda.cudaFree(devPtr);
}
示例2: Matrix
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
public Matrix(int rows, int cols) {
this.dontFree = false;
this.rows = rows;
this.cols = cols;
this.data_d = new Pointer();
JCuda.cudaMalloc(data_d, rows*cols * Sizeof.FLOAT);
CublasUtil.allocated.add(this);
}
示例3: getrfGetriBatched
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
private static void getrfGetriBatched(List<Matrix> A, List<Matrix> B) {
Pointer[] Apointers = new Pointer[A.size()];
Pointer[] Bpointers = new Pointer[B.size()];
for (int i=0; i<A.size(); ++i) {
Apointers[i] = A.get(i).data_d;
Bpointers[i] = B.get(i).data_d;
}
Pointer Apointers_d = new Pointer();
JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer Bpointers_d = new Pointer();
JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer info_d = new Pointer();
JCuda.cudaMalloc(info_d, A.size() * Sizeof.INT);
Pointer pivots_d = new Pointer();
JCuda.cudaMalloc(pivots_d, A.get(0).rows * A.size() * Sizeof.INT);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCublas2.cublasSgetrfBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, info_d, A.size());
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCublas2.cublasSgetriBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, Bpointers_d, B.get(0).rows, info_d, A.size());
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCuda.cudaFree(Apointers_d);
JCuda.cudaFree(Bpointers_d);
JCuda.cudaFree(info_d);
JCuda.cudaFree(pivots_d);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
示例4: gemmBatched
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
private static void gemmBatched(float alpha, List<Matrix> A, List<Matrix> B, float beta, List<Matrix> C) {
Pointer[] Apointers = new Pointer[A.size()];
Pointer[] Bpointers = new Pointer[B.size()];
Pointer[] Cpointers = new Pointer[C.size()];
for (int i=0; i<A.size(); ++i) {
Apointers[i] = A.get(i).data_d;
Bpointers[i] = B.get(i).data_d;
Cpointers[i] = C.get(i).data_d;
}
Pointer Apointers_d = new Pointer();
JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer Bpointers_d = new Pointer();
JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer Cpointers_d = new Pointer();
JCuda.cudaMalloc(Cpointers_d, C.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Cpointers_d, Pointer.to(Cpointers), C.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCublas2.cublasSgemmBatched(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.get(0).rows, C.get(0).cols, B.get(0).rows, Pointer.to(new float[] {alpha}), Apointers_d, A.get(0).rows, Bpointers_d, B.get(0).rows, Pointer.to(new float[] {beta}), Cpointers_d, C.get(0).rows, A.size());
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCuda.cudaFree(Apointers_d);
JCuda.cudaFree(Bpointers_d);
JCuda.cudaFree(Cpointers_d);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
示例5: cudaMalloc
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* Cuda malloc int.
*
* @param devPtr the dev ptr
* @param size the size
* @return the int
*/
public static int cudaMalloc(final Pointer devPtr, final long size) {
long startTime = System.nanoTime();
final int result = JCuda.cudaMalloc(devPtr, size);
CuDNN.log("cudaMalloc", result, devPtr, size);
cudaMalloc_execution.accept((System.nanoTime() - startTime) / 1e9);
handle(result);
return result;
}
示例6: CUDAVolume3D
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
public CUDAVolume3D(int[] size2, float[] dim2, int inDim) {
super(size2, dim2, inDim);
int adaptedWidth = CUDAUtil.iDivUp(size[2], CUDAUtil.gridBlockSize[0]) * CUDAUtil.gridBlockSize[0];
int adaptedHeight = CUDAUtil.iDivUp(size[1], CUDAUtil.gridBlockSize[1]) * CUDAUtil.gridBlockSize[1];
int memorySize = adaptedWidth*adaptedHeight*size[0]* getInternalDimension() * Sizeof.FLOAT;
deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
JCuda.cudaMemset(deviceX, 0, memorySize);
}
示例7: copyToDeviceMemory
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* copies an int array to the device and returns a pointer to the memory.
* @param data the int array
* @return the pointer to the device memory
*/
public static CUdeviceptr copyToDeviceMemory(int [] data){
int memorySize = data.length * Sizeof.INT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
JCuda.cudaMemcpy(deviceX, Pointer.to(data), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
return deviceX;
}
示例8: allocateSpace
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* Allocates space on the CUDA device for a Volume3D
* @param vol the volume
* @return the pointer to the memory
*/
public static CUdeviceptr allocateSpace(Volume3D vol){
// We allocate too much memory as we parallelize along x and y direction and the memory must be a multiple along this direction internally.
int adaptedWidth = iDivUp(vol.size[2], gridBlockSize[0]) * gridBlockSize[0];
int adaptedHeight = iDivUp(vol.size[1], gridBlockSize[1]) * gridBlockSize[1];
int memorySize = adaptedWidth*adaptedHeight*vol.size[0]* vol.getInternalDimension() * Sizeof.FLOAT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
return deviceX;
}
示例9: testGPUInstallation
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* todo needs some more development/expansion
*/
private void testGPUInstallation(){
try {
jcuda.Pointer pointer = new jcuda.Pointer();
JCuda.cudaMalloc(pointer, 4);
JCuda.cudaFree(pointer);
}
catch (Exception e) {
System.err.println("GPU/CUDA Installation Not Detected");
System.err.println("Exiting HiCCUPS");
System.exit(24);
}
}
示例10: makeComplex
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
@Override
public void makeComplex(Volume3D vol)
{
if (vol.getInternalDimension() == 2) return;
if (vol.getInternalDimension() != 1) {
fprintf("vol_make_comlex: Invalid dimension\n");
return;
}
initCUDA();
int adaptedWidth = CUDAUtil.iDivUp(vol.size[2], CUDAUtil.gridBlockSize[0]) * CUDAUtil.gridBlockSize[0];
int adaptedHeight = CUDAUtil.iDivUp(vol.size[1], CUDAUtil.gridBlockSize[1]) * CUDAUtil.gridBlockSize[1];
int memorySize = adaptedWidth*adaptedHeight*vol.size[0]* 2 * Sizeof.FLOAT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
JCuda.cudaMemset(deviceX, 0, memorySize);
CUDAVolume3D cudaVol = (CUDAVolume3D) vol;
// Calculate new grid size
gridSize = getGrid(vol.size);
Pointer sizePointer = CUDAUtil.copyToDeviceMemory(vol.size);
CUfunction function = new CUfunction();
JCudaDriver.cuModuleGetFunction(function, module,
"_Z11makeComplexPfS_Pi");
ArrayList<Object> arguments = new ArrayList<Object>();
arguments.add(cudaVol.getDevicePointer());
arguments.add(deviceX);
arguments.add(sizePointer);
callCUDAFunction(function, arguments);
JCuda.cudaFree(sizePointer);
JCuda.cudaFree(cudaVol.getDevicePointer());
cudaVol.setDevicePointer(deviceX);
float [][][] temp = new float [vol.size[0]][vol.size[1]][vol.size[2]*2];
vol.data = null;
vol.data = temp;
temp = null;
vol.in_dim = 2;
return;
}
示例11: real
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
@Override
public int real(Volume3D vol)
{
if (debug) {
System.out.print("Called real ");
printTime();
}
if (DEBUG_FLAG)
fprintf("vol_real\n");
if (vol.in_dim == 1) return(0);
if (vol.in_dim != 2) {
fprintf( "vol_real: Invalid dimension\n");
return(-1);
}
int adaptedWidth = CUDAUtil.iDivUp(vol.size[2], CUDAUtil.gridBlockSize[0]) * CUDAUtil.gridBlockSize[0];
int adaptedHeight = CUDAUtil.iDivUp(vol.size[1], CUDAUtil.gridBlockSize[1]) * CUDAUtil.gridBlockSize[1];
int memorySize = adaptedWidth*adaptedHeight*vol.size[0]* 1 * Sizeof.FLOAT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
CUDAVolume3D cudaVol = (CUDAVolume3D) vol;
// Calculate new grid size
gridSize = getGrid(vol.size);
Pointer sizePointer = CUDAUtil.copyToDeviceMemory(vol.size);
CUfunction function = new CUfunction();
JCudaDriver.cuModuleGetFunction(function, module,
"_Z4realPfS_Pi");
ArrayList<Object> arguments = new ArrayList<Object>();
arguments.add(cudaVol.getDevicePointer());
arguments.add(deviceX);
arguments.add(sizePointer);
if (debug) {
System.out.print("Called init done ");
printTime();
}
callCUDAFunction(function, arguments);
if (debug) {
System.out.print("CUDA done ");
printTime();
}
JCuda.cudaFree(sizePointer);
JCuda.cudaFree(cudaVol.getDevicePointer());
cudaVol.setDevicePointer(deviceX);
float [][][] temp = new float[vol.size[0]][vol.size[1]][vol.size[2]];
vol.data = null;
vol.data = temp;
temp = null;
vol.in_dim = 1;
if (debug) {
System.out.print("Clean up done ");
printTime();
}
return(0);
}
示例12: imag
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
@Override
public int imag(Volume3D vol)
{
if (debug) {
System.out.print("Called real ");
printTime();
}
if (DEBUG_FLAG)
fprintf("vol_real\n");
if (vol.in_dim == 1) return(0);
if (vol.in_dim != 2) {
fprintf( "vol_real: Invalid dimension\n");
return(-1);
}
int adaptedWidth = CUDAUtil.iDivUp(vol.size[2], CUDAUtil.gridBlockSize[0]) * CUDAUtil.gridBlockSize[0];
int adaptedHeight = CUDAUtil.iDivUp(vol.size[1], CUDAUtil.gridBlockSize[1]) * CUDAUtil.gridBlockSize[1];
int memorySize = adaptedWidth*adaptedHeight*vol.size[0]* 1 * Sizeof.FLOAT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
CUDAVolume3D cudaVol = (CUDAVolume3D) vol;
// Calculate new grid size
gridSize = getGrid(vol.size);
Pointer sizePointer = CUDAUtil.copyToDeviceMemory(vol.size);
CUfunction function = new CUfunction();
JCudaDriver.cuModuleGetFunction(function, module,
"_Z4imagPfS_Pi");
ArrayList<Object> arguments = new ArrayList<Object>();
arguments.add(cudaVol.getDevicePointer());
arguments.add(deviceX);
arguments.add(sizePointer);
if (debug) {
System.out.print("Called init done ");
printTime();
}
callCUDAFunction(function, arguments);
if (debug) {
System.out.print("CUDA done ");
printTime();
}
JCuda.cudaFree(sizePointer);
JCuda.cudaFree(cudaVol.getDevicePointer());
cudaVol.setDevicePointer(deviceX);
float [][][] temp = new float[vol.size[0]][vol.size[1]][vol.size[2]];
vol.data = null;
vol.data = temp;
temp = null;
vol.in_dim = 1;
if (debug) {
System.out.print("Clean up done ");
printTime();
}
return(0);
}
示例13: abs
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
@Override
public int abs(Volume3D vol)
{
if (DEBUG_FLAG)
fprintf("vol_abs\n");
if (vol.in_dim != 1 && vol.in_dim != 2) {
fprintf( "vol_abs: Invalid dimension\n");
return(-1);
}
int adaptedWidth = CUDAUtil.iDivUp(vol.size[2], CUDAUtil.gridBlockSize[0]) * CUDAUtil.gridBlockSize[0];
int adaptedHeight = CUDAUtil.iDivUp(vol.size[1], CUDAUtil.gridBlockSize[1]) * CUDAUtil.gridBlockSize[1];
int memorySize = adaptedWidth*adaptedHeight*vol.size[0]* 1 * Sizeof.FLOAT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
initCUDA();
CUDAVolume3D cudaVol = (CUDAVolume3D) vol;
// Calculate new grid size
gridSize = getGrid(vol.size);
Pointer sizePointer = CUDAUtil.copyToDeviceMemory(vol.size);
CUfunction function = new CUfunction();
JCudaDriver.cuModuleGetFunction(function, module,
"_Z3absPfS_Pii");
ArrayList<Object> arguments = new ArrayList<Object>();
arguments.add(cudaVol.getDevicePointer());
arguments.add(deviceX);
arguments.add(sizePointer);
arguments.add(cudaVol.in_dim);
if (debug) {
System.out.print("Called init done ");
printTime();
}
callCUDAFunction(function, arguments);
if (debug) {
System.out.print("CUDA done ");
printTime();
}
JCuda.cudaFree(sizePointer);
JCuda.cudaFree(cudaVol.getDevicePointer());
cudaVol.setDevicePointer(deviceX);
float [][][] temp = new float[vol.size[0]][vol.size[1]][vol.size[2]];
vol.data = null;
vol.data = temp;
temp = null;
vol.in_dim = 1;
if (debug) {
System.out.print("Clean up done ");
printTime();
}
return(0);
}