本文整理汇总了Java中jcuda.runtime.JCuda.cudaMemcpy方法的典型用法代码示例。如果您正苦于以下问题:Java JCuda.cudaMemcpy方法的具体用法?Java JCuda.cudaMemcpy怎么用?Java JCuda.cudaMemcpy使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jcuda.runtime.JCuda
的用法示例。
在下文中一共展示了JCuda.cudaMemcpy方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getrfGetriBatched
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
private static void getrfGetriBatched(List<Matrix> A, List<Matrix> B) {
Pointer[] Apointers = new Pointer[A.size()];
Pointer[] Bpointers = new Pointer[B.size()];
for (int i=0; i<A.size(); ++i) {
Apointers[i] = A.get(i).data_d;
Bpointers[i] = B.get(i).data_d;
}
Pointer Apointers_d = new Pointer();
JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer Bpointers_d = new Pointer();
JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer info_d = new Pointer();
JCuda.cudaMalloc(info_d, A.size() * Sizeof.INT);
Pointer pivots_d = new Pointer();
JCuda.cudaMalloc(pivots_d, A.get(0).rows * A.size() * Sizeof.INT);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCublas2.cublasSgetrfBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, info_d, A.size());
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCublas2.cublasSgetriBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, Bpointers_d, B.get(0).rows, info_d, A.size());
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCuda.cudaFree(Apointers_d);
JCuda.cudaFree(Bpointers_d);
JCuda.cudaFree(info_d);
JCuda.cudaFree(pivots_d);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
示例2: gemmBatched
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
private static void gemmBatched(float alpha, List<Matrix> A, List<Matrix> B, float beta, List<Matrix> C) {
Pointer[] Apointers = new Pointer[A.size()];
Pointer[] Bpointers = new Pointer[B.size()];
Pointer[] Cpointers = new Pointer[C.size()];
for (int i=0; i<A.size(); ++i) {
Apointers[i] = A.get(i).data_d;
Bpointers[i] = B.get(i).data_d;
Cpointers[i] = C.get(i).data_d;
}
Pointer Apointers_d = new Pointer();
JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer Bpointers_d = new Pointer();
JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
Pointer Cpointers_d = new Pointer();
JCuda.cudaMalloc(Cpointers_d, C.size() * Sizeof.POINTER);
JCuda.cudaMemcpy(Cpointers_d, Pointer.to(Cpointers), C.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCublas2.cublasSgemmBatched(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.get(0).rows, C.get(0).cols, B.get(0).rows, Pointer.to(new float[] {alpha}), Apointers_d, A.get(0).rows, Bpointers_d, B.get(0).rows, Pointer.to(new float[] {beta}), Cpointers_d, C.get(0).rows, A.size());
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
JCuda.cudaFree(Apointers_d);
JCuda.cudaFree(Bpointers_d);
JCuda.cudaFree(Cpointers_d);
if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
示例3: copyToDeviceMemory
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* copies an int array to the device and returns a pointer to the memory.
* @param data the int array
* @return the pointer to the device memory
*/
public static CUdeviceptr copyToDeviceMemory(int [] data){
int memorySize = data.length * Sizeof.INT;
CUdeviceptr deviceX = new CUdeviceptr();
JCuda.cudaMalloc(deviceX, memorySize);
JCuda.cudaMemcpy(deviceX, Pointer.to(data), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
return deviceX;
}
示例4: fetchFromDeviceMemory
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* fetches a float data array from the device and frees the memory on the device.
* @param data the float array to write to
* @param deviceX the pointer to the device memory
*/
public static void fetchFromDeviceMemory(float [] data, CUdeviceptr deviceX){
int memorySize = data.length * Sizeof.FLOAT;
JCuda.cudaMemcpy(Pointer.to(data), deviceX, memorySize,
cudaMemcpyKind.cudaMemcpyDeviceToHost);
JCuda.cudaFree(deviceX);
}
示例5: moveToDevice
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* Moves the volume to the device.
* @param vol the volume
* @param deviceX the memory pointer
*/
public static void moveToDevice(Volume3D vol, CUdeviceptr deviceX){
// Allocate memory on the device using JCuda
int memorySize = vol.size[2]* vol.getInternalDimension() * Sizeof.FLOAT;
// Copy memory from host to device using JCuda
for (int i = 0; i < vol.size[0]; i++){
for(int j = 0; j < vol.size[1]; j++){
AdjustablePointer offset = new AdjustablePointer(deviceX, ((vol.size[1]*i) + j) * memorySize);
JCuda.cudaMemcpy(offset, Pointer.to(vol.data[i][j]), memorySize,
cudaMemcpyKind.cudaMemcpyHostToDevice);
}
}
}
示例6: fetchFromDevice
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* Fetches the volume from the device
* @param vol the volume object
* @param deviceX the pointer to the memory on the deivce.
*/
public static void fetchFromDevice(Volume3D vol, CUdeviceptr deviceX){
// Allocate memory on the device using JCuda
int memorySize = vol.size[2]* vol.getInternalDimension() * Sizeof.FLOAT;
// Copy memory from host to device using JCuda
for (int i = 0; i < vol.size[0]; i++){
for(int j = 0; j < vol.size[1]; j++){
AdjustablePointer offset = new AdjustablePointer(deviceX, (((vol.size[1]*i) + j) * memorySize));
JCuda.cudaMemcpy(Pointer.to(vol.data[i][j]), offset, memorySize,
cudaMemcpyKind.cudaMemcpyDeviceToHost);
}
}
}
示例7: cudaMemcpy
import jcuda.runtime.JCuda; //导入方法依赖的package包/类
/**
* Cuda memcpy int.
*
* @param dst the dst
* @param src the src
* @param count the count
* @param cudaMemcpyKind_kind the cuda memcpy kind kind
* @return the int
*/
public static void cudaMemcpy(final Pointer dst, final Pointer src, final long count, final int cudaMemcpyKind_kind) {
long startTime = System.nanoTime();
final int result = JCuda.cudaMemcpy(dst, src, count, cudaMemcpyKind_kind);
cudaMemcpy_execution.accept((System.nanoTime() - startTime) / 1e9);
CuDNN.log("cudaMemcpy", result, dst, src, count, cudaMemcpyKind_kind);
handle(result);
}