本文整理汇总了C#中CudaDeviceVariable.CopyToHost方法的典型用法代码示例。如果您正苦于以下问题:C# CudaDeviceVariable.CopyToHost方法的具体用法?C# CudaDeviceVariable.CopyToHost怎么用?C# CudaDeviceVariable.CopyToHost使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类CudaDeviceVariable
的用法示例。
在下文中一共展示了CudaDeviceVariable.CopyToHost方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: Backward
public void Backward(CudnnSoftmaxAlgorithm algorithm, CudnnSoftmaxMode mode,
CudnnTensorDescriptor srcTensor, float[] srcData, CudnnTensorDescriptor srcDiffTensor, float[] srcDiffData,
CudnnTensorDescriptor destDiffTensor, float[] destDiffData)
{
Contract.Requires(srcTensor != null);
Contract.Requires(srcData != null);
Contract.Requires(srcDiffTensor != null);
Contract.Requires(srcDiffData != null);
Contract.Requires(destDiffTensor != null);
Contract.Requires(destDiffData != null);
ThrowIfNotInitialized();
CheckIfCompatible(CudnnType.Float, srcTensor, srcDiffTensor, destDiffTensor);
using (var srcDataGpu = new CudaDeviceVariable<float>(srcData.Length))
using (var srcDiffDataGpu = new CudaDeviceVariable<float>(srcDiffData.Length))
using (var destDiffDataGpu = new CudaDeviceVariable<float>(destDiffData.Length))
{
srcDataGpu.CopyToDevice(srcData);
srcDiffDataGpu.CopyToDevice(srcDiffData);
Invoke(() => CudnnNativeMethods.cudnnSoftmaxBackward(handle, algorithm, mode,
srcTensor.Handle, srcDataGpu.DevicePointer, srcDiffTensor.Handle, srcDiffDataGpu.DevicePointer,
destDiffTensor.Handle, destDiffDataGpu.DevicePointer));
destDiffDataGpu.CopyToHost(destDiffData);
}
}
示例2: Test
static void Test(byte[] ptxFile)
{
const int size = 16;
var context = new CudaContext();
var kernel = context.LoadKernelPTX(ptxFile, "kernel");
var memory = context.AllocateMemory(4 * size);
var gpuMemory = new CudaDeviceVariable<int>(memory);
var cpuMemory = new int[size];
for (var i = 0; i < size; i++)
cpuMemory[i] = i - 2;
gpuMemory.CopyToDevice(cpuMemory);
kernel.BlockDimensions = 4;
kernel.GridDimensions = 4;
kernel.Run(memory);
gpuMemory.CopyToHost(cpuMemory);
for (var i = 0; i < size; i++)
Console.WriteLine("{0} = {1}", i, cpuMemory[i]);
}
示例3: BackwardBias
public void BackwardBias(CudnnTensorDescriptor srcTensor, double[] srcData, CudnnTensorDescriptor destTensor, double[] destData, CudnnAccumulateResult accumulate)
{
Contract.Requires(srcTensor != null);
Contract.Requires(srcData != null);
Contract.Requires(destTensor != null);
Contract.Requires(destData != null);
ThrowIfNotInitialized();
CheckIfCompatible(CudnnType.Double, srcTensor, destTensor);
using (var srcDataGpu = new CudaDeviceVariable<double>(srcData.Length))
using (var destDataGpu = new CudaDeviceVariable<double>(destData.Length))
{
srcDataGpu.CopyToDevice(srcData);
Invoke(() => CudnnNativeMethods.cudnnConvolutionBackwardBias(handle, srcTensor.Handle, srcDataGpu.DevicePointer, destTensor.Handle, destDataGpu.DevicePointer, accumulate));
destDataGpu.CopyToHost(destData);
}
}
示例4: Run
public void Run(DistanceOperation operation,
CudaDeviceVariable<float> A, int sizeA,
CudaDeviceVariable<float> B, int sizeB,
CudaDeviceVariable<float> result, int sizeRes)
{
if (!ValidateAtRun(operation))
return;
switch (operation)
{
case DistanceOperation.DotProd:
//ZXC m_dotKernel.Run(result.DevicePointer, 0, A.DevicePointer, B.DevicePointer, sizeA, 0);
m_dotKernel.Run(result.DevicePointer, A.DevicePointer, B.DevicePointer, sizeA);
break;
case DistanceOperation.CosDist:
//ZXC m_cosKernel.Run(result.DevicePointer, 0, A.DevicePointer, B.DevicePointer, sizeA, 0);
m_cosKernel.Run(result.DevicePointer, A.DevicePointer, B.DevicePointer, sizeA);
break;
case DistanceOperation.EuclidDist:
float res = RunReturn(operation, A, sizeA, B, sizeB);
result.CopyToDevice(res);
break;
case DistanceOperation.EuclidDistSquared:
m_combineVecsKernel.SetupExecution(sizeA);
m_combineVecsKernel.Run(A.DevicePointer, B.DevicePointer, m_temp, (int)MyJoin.MyJoinOperation.Subtraction, sizeA);
//ZXC m_dotKernel.Run(result.DevicePointer, 0, m_temp, m_temp, m_temp.Count, 0);
m_dotKernel.Run(result.DevicePointer, m_temp, m_temp);
break;
case DistanceOperation.HammingDist:
m_combineVecsKernel.SetupExecution(sizeA);
m_combineVecsKernel.Run(A.DevicePointer, B.DevicePointer, m_temp, (int)MyJoin.MyJoinOperation.Equal, sizeA);
//ZXC m_reduceSumKernel.Run(result.DevicePointer, m_temp, m_temp.Count, 0, 0, 1, /*distributed = false*/0); // reduction to a single number
m_reduceSumKernel.Run(result.DevicePointer, m_temp);
float fDist = 0; // to transform number of matches to a number of differences
result.CopyToHost(ref fDist);
fDist = m_temp.Count - fDist;
result.CopyToDevice(fDist);
break;
case DistanceOperation.HammingSim:
m_combineVecsKernel.SetupExecution(sizeA);
m_combineVecsKernel.Run(A.DevicePointer, B.DevicePointer, m_temp, (int)MyJoin.MyJoinOperation.Equal, sizeA);
//ZXC m_reduceSumKernel.Run(result.DevicePointer, m_temp, m_temp.Count, 0, 0, 1, /*distributed = false*/0); // reduction to a single number
m_reduceSumKernel.Run(result.DevicePointer, m_temp);
// take the single number (number of different bits) and convert it to Hamming Similarity:
// a number in range <0,1> that says how much the vectors are similar
float fSim = 0;
result.CopyToHost(ref fSim);
fSim = fSim / m_temp.Count;
result.CopyToDevice(fSim);
break;
}
}
示例5: Backward
public void Backward(CudnnPoolingDescriptor pooling, CudnnTensorDescriptor srcTensor, double[] srcData, CudnnTensorDescriptor srcDiffTensor, double[] srcDiffData,
CudnnTensorDescriptor destTensor, double[] destData, CudnnTensorDescriptor destDiffTensor, double[] destDiffData)
{
Contract.Requires(pooling != null);
Contract.Requires(srcTensor != null);
Contract.Requires(srcData != null);
Contract.Requires(destTensor != null);
Contract.Requires(destData != null);
Contract.Requires(srcDiffTensor != null);
Contract.Requires(srcDiffData != null);
Contract.Requires(destDiffTensor != null);
Contract.Requires(destDiffData != null);
ThrowIfNotInitialized();
CheckIfCompatible(CudnnType.Double, srcTensor, srcDiffTensor, destTensor, destDiffTensor);
using (var srcDataGpu = new CudaDeviceVariable<double>(srcData.Length))
using (var srcDiffDataGpu = new CudaDeviceVariable<double>(srcDiffData.Length))
using (var destDataGpu = new CudaDeviceVariable<double>(destData.Length))
using (var destDiffDataGpu = new CudaDeviceVariable<double>(destDiffData.Length))
{
srcDataGpu.CopyToDevice(srcData);
srcDiffDataGpu.CopyToDevice(srcDiffData);
destDataGpu.CopyToDevice(destData);
Invoke(() => CudnnNativeMethods.cudnnPoolingBackward(handle, pooling.Handle,
srcTensor.Handle, srcDataGpu.DevicePointer, srcDiffTensor.Handle, srcDiffDataGpu.DevicePointer,
destTensor.Handle, destDataGpu.DevicePointer, destDiffTensor.Handle, destDiffDataGpu.DevicePointer));
destDiffDataGpu.CopyToHost(destDiffData);
}
}
示例6: SaveJpeg
//.........这里部分代码省略.........
nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f);
}
for (int i = 0; i < oFrameHeader.nComponents; ++i)
{
NppiSize oBlocks = new NppiSize();
NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4);
oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 *
(float)(oBlocksPerMCU.width) / nMCUBlocksH);
oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 *
(float)(oBlocksPerMCU.height) / nMCUBlocksV);
oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
// Allocate Memory
apdDCT[i] = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height);
}
/***************************
*
* Output
*
***************************/
// Forward DCT
for (int i = 0; i < 3; ++i)
{
compression.DCTQuantFwd8x8LS(apDstImage[i], apdDCT[i], aDstSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]);
}
// Huffman Encoding
CudaDeviceVariable<byte> pdScan = new CudaDeviceVariable<byte>(BUFFER_SIZE);
int nScanLength = 0;
int nTempSize = JPEGCompression.EncodeHuffmanGetSize(aDstSize[0], 3);
CudaDeviceVariable<byte> pJpegEncoderTemp = new CudaDeviceVariable<byte>(nTempSize);
NppiEncodeHuffmanSpec[] apHuffmanDCTableEnc = new NppiEncodeHuffmanSpec[3];
NppiEncodeHuffmanSpec[] apHuffmanACTableEnc = new NppiEncodeHuffmanSpec[3];
for (int i = 0; i < 3; ++i)
{
apHuffmanDCTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable);
apHuffmanACTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable);
}
JPEGCompression.EncodeHuffmanScan(apdDCT, 0, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, pdScan, ref nScanLength, apHuffmanDCTableEnc, apHuffmanACTableEnc, aDstSize, pJpegEncoderTemp);
for (int i = 0; i < 3; ++i)
{
JPEGCompression.EncodeHuffmanSpecFree(apHuffmanDCTableEnc[i]);
JPEGCompression.EncodeHuffmanSpecFree(apHuffmanACTableEnc[i]);
}
// Write JPEG to byte array, as in original sample code
byte[] pDstOutput = new byte[BUFFER_SIZE];
int pos = 0;
oFrameHeader.nWidth = (ushort)oDstImageSize.width;
oFrameHeader.nHeight = (ushort)oDstImageSize.height;
writeMarker(0x0D8, pDstOutput, ref pos);
writeJFIFTag(pDstOutput, ref pos);
writeQuantizationTable(aQuantizationTables[0], pDstOutput, ref pos);
writeQuantizationTable(aQuantizationTables[1], pDstOutput, ref pos);
writeFrameHeader(oFrameHeader, pDstOutput, ref pos);
writeHuffmanTable(aHuffmanTables[0], pDstOutput, ref pos);
writeHuffmanTable(aHuffmanTables[1], pDstOutput, ref pos);
writeHuffmanTable(aHuffmanTables[2], pDstOutput, ref pos);
writeHuffmanTable(aHuffmanTables[3], pDstOutput, ref pos);
writeScanHeader(oScanHeader, pDstOutput, ref pos);
pdScan.CopyToHost(pDstOutput, 0, pos, nScanLength);
pos += nScanLength;
writeMarker(0x0D9, pDstOutput, ref pos);
FileStream fs = new FileStream(aFilename, FileMode.Create, FileAccess.Write);
fs.Write(pDstOutput, 0, pos);
fs.Close();
//cleanup:
fs.Dispose();
pJpegEncoderTemp.Dispose();
pdScan.Dispose();
apdDCT[2].Dispose();
apdDCT[1].Dispose();
apdDCT[0].Dispose();
pdQuantizationTables[1].Dispose();
pdQuantizationTables[0].Dispose();
srcCr.Dispose();
srcCb.Dispose();
srcY.Dispose();
src.Dispose();
compression.Dispose();
}
示例7: Generate
private void Generate(CudaKernel kernelPositionWeight, int width, int height, int depth)
{
int count = width * height * depth;
int widthD = width - 1;
int heightD = height - 1;
int depthD = depth - 1;
int countDecremented = widthD * heightD * depthD;
dim3 blockDimensions = new dim3(8, 8, 8);
dim3 gridDimensions = new dim3((int)Math.Ceiling(width / 8.0), (int)Math.Ceiling(height / 8.0), (int)Math.Ceiling(depth / 8.0));
dim3 gridDimensionsDecremented = new dim3((int)Math.Ceiling(widthD / 8.0), (int)Math.Ceiling(heightD / 8.0), (int)Math.Ceiling(depthD / 8.0));
CUDANoiseCube noiseCube = new CUDANoiseCube();
CudaArray3D noiseArray = noiseCube.GenerateUniformArray(16, 16, 16);
CudaTextureArray3D noiseTexture = new CudaTextureArray3D(kernelPositionWeight, "noiseTexture", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates, noiseArray);
CudaDeviceVariable<Voxel> voxelsDev = new CudaDeviceVariable<Voxel>(count);
kernelPositionWeight.BlockDimensions = blockDimensions;
typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelPositionWeight, gridDimensions);
kernelPositionWeight.Run(voxelsDev.DevicePointer, width, height, depth);
kernelNormalAmbient.BlockDimensions = blockDimensions;
typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelNormalAmbient, gridDimensions);
kernelNormalAmbient.Run(voxelsDev.DevicePointer, width, height, depth, container.Settings.AmbientRayWidth, container.Settings.AmbientSamplesCount);
int nearestW = NearestPowerOfTwo(widthD);
int nearestH = NearestPowerOfTwo(heightD);
int nearestD = NearestPowerOfTwo(depthD);
int nearestCount = nearestW * nearestH * nearestD;
CudaDeviceVariable<int> trisCountDevice = new CudaDeviceVariable<int>(nearestCount);
trisCountDevice.Memset(0);
CudaDeviceVariable<int> offsetsDev = new CudaDeviceVariable<int>(countDecremented);
kernelMarchingCubesCases.BlockDimensions = blockDimensions;
typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelMarchingCubesCases, gridDimensionsDecremented);
kernelMarchingCubesCases.Run(voxelsDev.DevicePointer, width, height, depth, offsetsDev.DevicePointer, trisCountDevice.DevicePointer, nearestW, nearestH, nearestD);
CudaDeviceVariable<int> prefixSumsDev = prefixScan.PrefixSumArray(trisCountDevice, nearestCount);
int lastTrisCount = 0;
trisCountDevice.CopyToHost(ref lastTrisCount, (nearestCount - 1) * sizeof(int));
int lastPrefixSum = 0;
prefixSumsDev.CopyToHost(ref lastPrefixSum, (nearestCount - 1) * sizeof(int));
int totalVerticesCount = (lastTrisCount + lastPrefixSum) * 3;
if (totalVerticesCount > 0)
{
if (container.Geometry != null)
container.Geometry.Dispose();
container.VertexCount = totalVerticesCount;
container.Geometry = new Buffer(graphicsDevice, new BufferDescription()
{
BindFlags = BindFlags.VertexBuffer,
CpuAccessFlags = CpuAccessFlags.None,
OptionFlags = ResourceOptionFlags.None,
SizeInBytes = Marshal.SizeOf(typeof(VoxelMeshVertex)) * totalVerticesCount,
Usage = ResourceUsage.Default
});
CudaDirectXInteropResource directResource = new CudaDirectXInteropResource(container.Geometry.ComPointer, CUGraphicsRegisterFlags.None, CudaContext.DirectXVersion.D3D11, CUGraphicsMapResourceFlags.None);
kernelMarchingCubesVertices.BlockDimensions = blockDimensions;
typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelMarchingCubesVertices, gridDimensionsDecremented);
directResource.Map();
kernelMarchingCubesVertices.Run(directResource.GetMappedPointer(), voxelsDev.DevicePointer, prefixSumsDev.DevicePointer, offsetsDev.DevicePointer, width, height, depth, nearestW, nearestH, nearestD);
directResource.UnMap();
directResource.Dispose();
}
else
{
container.VertexCount = 0;
if (container.Geometry != null)
container.Geometry.Dispose();
}
noiseCube.Dispose();
prefixSumsDev.Dispose();
trisCountDevice.Dispose();
offsetsDev.Dispose();
noiseArray.Dispose();
noiseTexture.Dispose();
voxelsDev.Dispose();
}
示例8: Main
static void Main(string[] args)
{
string filename = "vectorAdd_kernel.cu"; //we assume the file is in the same folder...
string fileToCompile = File.ReadAllText(filename);
CudaRuntimeCompiler rtc = new CudaRuntimeCompiler(fileToCompile, "vectorAdd_kernel");
rtc.Compile(args);
string log = rtc.GetLogAsString();
Console.WriteLine(log);
byte[] ptx = rtc.GetPTX();
rtc.Dispose();
CudaContext ctx = new CudaContext(0);
CudaKernel vectorAdd = ctx.LoadKernelPTX(ptx, "vectorAdd");
// Print the vector length to be used, and compute its size
int numElements = 50000;
SizeT size = numElements * sizeof(float);
Console.WriteLine("[Vector addition of {0} elements]", numElements);
// Allocate the host input vector A
float[] h_A = new float[numElements];
// Allocate the host input vector B
float[] h_B = new float[numElements];
// Allocate the host output vector C
float[] h_C = new float[numElements];
Random rand = new Random(0);
// Initialize the host input vectors
for (int i = 0; i < numElements; ++i)
{
h_A[i] = (float)rand.NextDouble();
h_B[i] = (float)rand.NextDouble();
}
Console.WriteLine("Allocate and copy input data from the host memory to the CUDA device\n");
// Allocate the device input vector A and copy to device
CudaDeviceVariable<float> d_A = h_A;
// Allocate the device input vector B and copy to device
CudaDeviceVariable<float> d_B = h_B;
// Allocate the device output vector C
CudaDeviceVariable<float> d_C = new CudaDeviceVariable<float>(numElements);
// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
Console.WriteLine("CUDA kernel launch with {0} blocks of {1} threads\n", blocksPerGrid, threadsPerBlock);
vectorAdd.BlockDimensions = new dim3(threadsPerBlock,1, 1);
vectorAdd.GridDimensions = new dim3(blocksPerGrid, 1, 1);
vectorAdd.Run(d_A.DevicePointer, d_B.DevicePointer, d_C.DevicePointer, numElements);
// Copy the device result vector in device memory to the host result vector
// in host memory.
Console.WriteLine("Copy output data from the CUDA device to the host memory\n");
d_C.CopyToHost(h_C);
// Verify that the result vector is correct
for (int i = 0; i < numElements; ++i)
{
if (Math.Abs(h_A[i] + h_B[i] - h_C[i]) > 1e-5)
{
Console.WriteLine("Result verification failed at element {0}!\n", i);
return;
}
}
Console.WriteLine("Test PASSED\n");
// Free device global memory
d_A.Dispose();
d_B.Dispose();
d_C.Dispose();
ctx.Dispose();
Console.WriteLine("Done\n");
}
示例9: Forward
public void Forward(CudnnTensorDescriptor srcTensor, float[] srcData, CudnnFilterDescriptor filter, float[] filterData, CudnnConvolutionDescriptor convolution, CudnnTensorDescriptor destTensor, float[] destData, CudnnAccumulateResult accumulate)
{
Contract.Requires(srcTensor != null);
Contract.Requires(srcData != null);
Contract.Requires(filter != null);
Contract.Requires(filterData != null);
Contract.Requires(convolution != null);
Contract.Requires(destTensor != null);
Contract.Requires(destData != null);
ThrowIfNotInitialized();
CheckIfCompatible(CudnnType.Float, srcTensor, destTensor, filter);
using (var srcDataGpu = new CudaDeviceVariable<float>(srcData.Length))
using (var filterDataGpu = new CudaDeviceVariable<float>(filterData.Length))
using (var destDataGpu = new CudaDeviceVariable<float>(destData.Length))
{
srcDataGpu.CopyToDevice(srcData);
filterDataGpu.CopyToDevice(filterData);
Invoke(() => CudnnNativeMethods.cudnnConvolutionForward(handle, srcTensor.Handle, srcDataGpu.DevicePointer, filter.Handle, filterDataGpu.DevicePointer, convolution.Handle, destTensor.Handle, destDataGpu.DevicePointer, accumulate));
destDataGpu.CopyToHost(destData);
}
}
示例10: BackwardFilter
public void BackwardFilter(CudnnTensorDescriptor srcTensor, double[] srcData, CudnnTensorDescriptor diffTensor, double[] diffData, CudnnConvolutionDescriptor convolution, CudnnFilterDescriptor gradient, double[] gradientData, CudnnAccumulateResult accumulate)
{
Contract.Requires(srcTensor != null);
Contract.Requires(srcData != null);
Contract.Requires(diffTensor != null);
Contract.Requires(diffData != null);
Contract.Requires(convolution != null);
Contract.Requires(gradient != null);
Contract.Requires(gradientData != null);
ThrowIfNotInitialized();
CheckIfCompatible(CudnnType.Double, srcTensor, diffTensor, gradient);
using (var srcDataGpu = new CudaDeviceVariable<double>(srcData.Length))
using (var diffDataGpu = new CudaDeviceVariable<double>(diffData.Length))
using (var gradientDataGpu = new CudaDeviceVariable<double>(gradientData.Length))
{
srcDataGpu.CopyToDevice(srcData);
diffDataGpu.CopyToDevice(diffData);
Invoke(() => CudnnNativeMethods.cudnnConvolutionBackwardFilter(handle, srcTensor.Handle, srcDataGpu.DevicePointer, diffTensor.Handle, diffDataGpu.DevicePointer, convolution.Handle, gradient.Handle, gradientDataGpu.DevicePointer, accumulate));
gradientDataGpu.CopyToHost(gradientData);
}
}