当前位置: 首页>>代码示例>>C#>>正文


C# CudaDeviceVariable.CopyToHost方法代码示例

本文整理汇总了C#中CudaDeviceVariable.CopyToHost方法的典型用法代码示例。如果您正苦于以下问题:C# CudaDeviceVariable.CopyToHost方法的具体用法?C# CudaDeviceVariable.CopyToHost怎么用?C# CudaDeviceVariable.CopyToHost使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CudaDeviceVariable的用法示例。


在下文中一共展示了CudaDeviceVariable.CopyToHost方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: Backward

        public void Backward(CudnnSoftmaxAlgorithm algorithm, CudnnSoftmaxMode mode,
                             CudnnTensorDescriptor srcTensor, float[] srcData, CudnnTensorDescriptor srcDiffTensor, float[] srcDiffData,
                             CudnnTensorDescriptor destDiffTensor, float[] destDiffData)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(srcDiffTensor != null);
            Contract.Requires(srcDiffData != null);
            Contract.Requires(destDiffTensor != null);
            Contract.Requires(destDiffData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Float, srcTensor, srcDiffTensor, destDiffTensor);

            using (var srcDataGpu = new CudaDeviceVariable<float>(srcData.Length))
            using (var srcDiffDataGpu = new CudaDeviceVariable<float>(srcDiffData.Length))
            using (var destDiffDataGpu = new CudaDeviceVariable<float>(destDiffData.Length))
            {
                srcDataGpu.CopyToDevice(srcData);
                srcDiffDataGpu.CopyToDevice(srcDiffData);

                Invoke(() => CudnnNativeMethods.cudnnSoftmaxBackward(handle, algorithm, mode,
                                                                     srcTensor.Handle, srcDataGpu.DevicePointer, srcDiffTensor.Handle, srcDiffDataGpu.DevicePointer,
                                                                     destDiffTensor.Handle, destDiffDataGpu.DevicePointer));
                destDiffDataGpu.CopyToHost(destDiffData);
            }
        }
开发者ID:redknightlois,项目名称:cudalearn,代码行数:27,代码来源:CudnnContext.Softmax.cs

示例2: Test

 static void Test(byte[] ptxFile)
 {
     const int size = 16;
     var context = new CudaContext();
     var kernel = context.LoadKernelPTX(ptxFile, "kernel");
     var memory = context.AllocateMemory(4 * size);
     var gpuMemory = new CudaDeviceVariable<int>(memory);
     var cpuMemory = new int[size];
     for (var i = 0; i < size; i++)
         cpuMemory[i] = i - 2;
     gpuMemory.CopyToDevice(cpuMemory);
     kernel.BlockDimensions = 4;
     kernel.GridDimensions = 4;
     kernel.Run(memory);
     gpuMemory.CopyToHost(cpuMemory);
     for (var i = 0; i < size; i++)
         Console.WriteLine("{0} = {1}", i, cpuMemory[i]);
 }
开发者ID:shaunstanislaus,项目名称:CudaSharp,代码行数:18,代码来源:Program.cs

示例3: BackwardBias

        public void BackwardBias(CudnnTensorDescriptor srcTensor, double[] srcData, CudnnTensorDescriptor destTensor, double[] destData, CudnnAccumulateResult accumulate)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Double, srcTensor, destTensor);

            using (var srcDataGpu = new CudaDeviceVariable<double>(srcData.Length))
            using (var destDataGpu = new CudaDeviceVariable<double>(destData.Length))
            {
                srcDataGpu.CopyToDevice(srcData);

                Invoke(() => CudnnNativeMethods.cudnnConvolutionBackwardBias(handle, srcTensor.Handle, srcDataGpu.DevicePointer, destTensor.Handle, destDataGpu.DevicePointer, accumulate));
                destDataGpu.CopyToHost(destData);
            }
        }
开发者ID:redknightlois,项目名称:cudalearn,代码行数:19,代码来源:CudnnContext.Convolution.cs

示例4: Run

        public void Run(DistanceOperation operation,
            CudaDeviceVariable<float> A, int sizeA,
            CudaDeviceVariable<float> B, int sizeB,
            CudaDeviceVariable<float> result, int sizeRes)
        {
            if (!ValidateAtRun(operation))
                return;

            switch (operation)
            {
                case DistanceOperation.DotProd:
                    //ZXC m_dotKernel.Run(result.DevicePointer, 0, A.DevicePointer, B.DevicePointer, sizeA, 0);
                    m_dotKernel.Run(result.DevicePointer, A.DevicePointer, B.DevicePointer, sizeA);
                    break;

                case DistanceOperation.CosDist:
                    //ZXC m_cosKernel.Run(result.DevicePointer, 0, A.DevicePointer, B.DevicePointer, sizeA, 0);
                    m_cosKernel.Run(result.DevicePointer, A.DevicePointer, B.DevicePointer, sizeA);
                    break;

                case DistanceOperation.EuclidDist:
                    float res = RunReturn(operation, A, sizeA, B, sizeB);
                    result.CopyToDevice(res);
                    break;

                case DistanceOperation.EuclidDistSquared:
                    m_combineVecsKernel.SetupExecution(sizeA);
                    m_combineVecsKernel.Run(A.DevicePointer, B.DevicePointer, m_temp, (int)MyJoin.MyJoinOperation.Subtraction, sizeA);
                    //ZXC m_dotKernel.Run(result.DevicePointer, 0, m_temp, m_temp, m_temp.Count, 0);
                    m_dotKernel.Run(result.DevicePointer, m_temp, m_temp);
                    break;

                case DistanceOperation.HammingDist:
                    m_combineVecsKernel.SetupExecution(sizeA);
                    m_combineVecsKernel.Run(A.DevicePointer, B.DevicePointer, m_temp, (int)MyJoin.MyJoinOperation.Equal, sizeA);
                    //ZXC m_reduceSumKernel.Run(result.DevicePointer, m_temp, m_temp.Count, 0, 0, 1, /*distributed = false*/0); // reduction to a single number
                    m_reduceSumKernel.Run(result.DevicePointer, m_temp);
                    float fDist = 0; // to transform number of matches to a number of differences
                    result.CopyToHost(ref fDist);
                    fDist = m_temp.Count - fDist;
                    result.CopyToDevice(fDist);
                    break;

                case DistanceOperation.HammingSim:
                    m_combineVecsKernel.SetupExecution(sizeA);
                    m_combineVecsKernel.Run(A.DevicePointer, B.DevicePointer, m_temp, (int)MyJoin.MyJoinOperation.Equal, sizeA);
                    //ZXC m_reduceSumKernel.Run(result.DevicePointer, m_temp, m_temp.Count, 0, 0, 1, /*distributed = false*/0); // reduction to a single number
                    m_reduceSumKernel.Run(result.DevicePointer, m_temp);
                    // take the single number (number of different bits) and convert it to Hamming Similarity:
                    // a number in range <0,1> that says how much the vectors are similar
                    float fSim = 0;
                    result.CopyToHost(ref fSim);
                    fSim = fSim / m_temp.Count;
                    result.CopyToDevice(fSim);
                    break;
            }
        }
开发者ID:sschocke,项目名称:BrainSimulator,代码行数:57,代码来源:MyDistanceOps.cs

示例5: Backward

        public void Backward(CudnnPoolingDescriptor pooling, CudnnTensorDescriptor srcTensor, double[] srcData, CudnnTensorDescriptor srcDiffTensor, double[] srcDiffData,
                                                             CudnnTensorDescriptor destTensor, double[] destData, CudnnTensorDescriptor destDiffTensor, double[] destDiffData)
        {
            Contract.Requires(pooling != null);
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);
            Contract.Requires(srcDiffTensor != null);
            Contract.Requires(srcDiffData != null);
            Contract.Requires(destDiffTensor != null);
            Contract.Requires(destDiffData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Double, srcTensor, srcDiffTensor, destTensor, destDiffTensor);

            using (var srcDataGpu = new CudaDeviceVariable<double>(srcData.Length))
            using (var srcDiffDataGpu = new CudaDeviceVariable<double>(srcDiffData.Length))
            using (var destDataGpu = new CudaDeviceVariable<double>(destData.Length))
            using (var destDiffDataGpu = new CudaDeviceVariable<double>(destDiffData.Length))
            {
                srcDataGpu.CopyToDevice(srcData);
                srcDiffDataGpu.CopyToDevice(srcDiffData);
                destDataGpu.CopyToDevice(destData);

                Invoke(() => CudnnNativeMethods.cudnnPoolingBackward(handle, pooling.Handle,
                                                                     srcTensor.Handle, srcDataGpu.DevicePointer, srcDiffTensor.Handle, srcDiffDataGpu.DevicePointer,
                                                                     destTensor.Handle, destDataGpu.DevicePointer, destDiffTensor.Handle, destDiffDataGpu.DevicePointer));
                destDiffDataGpu.CopyToHost(destDiffData);
            }
        }
开发者ID:redknightlois,项目名称:cudalearn,代码行数:31,代码来源:CudnnContext.Pooling.cs

示例6: SaveJpeg


//.........这里部分代码省略.........
                nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f);
            }

            for (int i = 0; i < oFrameHeader.nComponents; ++i)
            {
                NppiSize oBlocks = new NppiSize();
                NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4);

                oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 *
                                          (float)(oBlocksPerMCU.width) / nMCUBlocksH);
                oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;

                oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 *
                                           (float)(oBlocksPerMCU.height) / nMCUBlocksV);
                oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;

                // Allocate Memory
                apdDCT[i] = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height);

            }

            /***************************
            *
            *   Output
            *
            ***************************/

            // Forward DCT
            for (int i = 0; i < 3; ++i)
            {
                compression.DCTQuantFwd8x8LS(apDstImage[i], apdDCT[i], aDstSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]);
            }

            // Huffman Encoding
            CudaDeviceVariable<byte> pdScan = new CudaDeviceVariable<byte>(BUFFER_SIZE);
            int nScanLength = 0;

            int nTempSize = JPEGCompression.EncodeHuffmanGetSize(aDstSize[0], 3);
            CudaDeviceVariable<byte> pJpegEncoderTemp = new CudaDeviceVariable<byte>(nTempSize);

            NppiEncodeHuffmanSpec[] apHuffmanDCTableEnc = new NppiEncodeHuffmanSpec[3];
            NppiEncodeHuffmanSpec[] apHuffmanACTableEnc = new NppiEncodeHuffmanSpec[3];

            for (int i = 0; i < 3; ++i)
            {
                apHuffmanDCTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable);
                apHuffmanACTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable);
            }

            JPEGCompression.EncodeHuffmanScan(apdDCT, 0, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, pdScan, ref nScanLength, apHuffmanDCTableEnc, apHuffmanACTableEnc, aDstSize, pJpegEncoderTemp);

            for (int i = 0; i < 3; ++i)
            {
                JPEGCompression.EncodeHuffmanSpecFree(apHuffmanDCTableEnc[i]);
                JPEGCompression.EncodeHuffmanSpecFree(apHuffmanACTableEnc[i]);
            }

            // Write JPEG to byte array, as in original sample code
            byte[] pDstOutput = new byte[BUFFER_SIZE];
            int pos = 0;

            oFrameHeader.nWidth = (ushort)oDstImageSize.width;
            oFrameHeader.nHeight = (ushort)oDstImageSize.height;

            writeMarker(0x0D8, pDstOutput, ref pos);
            writeJFIFTag(pDstOutput, ref pos);
            writeQuantizationTable(aQuantizationTables[0], pDstOutput, ref pos);
            writeQuantizationTable(aQuantizationTables[1], pDstOutput, ref pos);
            writeFrameHeader(oFrameHeader, pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[0], pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[1], pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[2], pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[3], pDstOutput, ref pos);
            writeScanHeader(oScanHeader, pDstOutput, ref pos);

            pdScan.CopyToHost(pDstOutput, 0, pos, nScanLength);

            pos += nScanLength;
            writeMarker(0x0D9, pDstOutput, ref pos);

            FileStream fs = new FileStream(aFilename, FileMode.Create, FileAccess.Write);
            fs.Write(pDstOutput, 0, pos);
            fs.Close();

            //cleanup:
            fs.Dispose();
            pJpegEncoderTemp.Dispose();
            pdScan.Dispose();
            apdDCT[2].Dispose();
            apdDCT[1].Dispose();
            apdDCT[0].Dispose();
            pdQuantizationTables[1].Dispose();
            pdQuantizationTables[0].Dispose();

            srcCr.Dispose();
            srcCb.Dispose();
            srcY.Dispose();
            src.Dispose();
            compression.Dispose();
        }
开发者ID:kunzmi,项目名称:managedCuda,代码行数:101,代码来源:JpegNPP.cs

示例7: Generate

        private void Generate(CudaKernel kernelPositionWeight, int width, int height, int depth)
        {
            int count = width * height * depth;
            int widthD = width - 1;
            int heightD = height - 1;
            int depthD = depth - 1;
            int countDecremented = widthD * heightD * depthD;

            dim3 blockDimensions = new dim3(8, 8, 8);
            dim3 gridDimensions = new dim3((int)Math.Ceiling(width / 8.0), (int)Math.Ceiling(height / 8.0), (int)Math.Ceiling(depth / 8.0));
            dim3 gridDimensionsDecremented = new dim3((int)Math.Ceiling(widthD / 8.0), (int)Math.Ceiling(heightD / 8.0), (int)Math.Ceiling(depthD / 8.0));

            CUDANoiseCube noiseCube = new CUDANoiseCube();

            CudaArray3D noiseArray = noiseCube.GenerateUniformArray(16, 16, 16);
            CudaTextureArray3D noiseTexture = new CudaTextureArray3D(kernelPositionWeight, "noiseTexture", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates, noiseArray);

            CudaDeviceVariable<Voxel> voxelsDev = new CudaDeviceVariable<Voxel>(count);

            kernelPositionWeight.BlockDimensions = blockDimensions;
            typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelPositionWeight, gridDimensions);

            kernelPositionWeight.Run(voxelsDev.DevicePointer, width, height, depth);

            kernelNormalAmbient.BlockDimensions = blockDimensions;
            typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelNormalAmbient, gridDimensions);

            kernelNormalAmbient.Run(voxelsDev.DevicePointer, width, height, depth, container.Settings.AmbientRayWidth, container.Settings.AmbientSamplesCount);

            int nearestW = NearestPowerOfTwo(widthD);
            int nearestH = NearestPowerOfTwo(heightD);
            int nearestD = NearestPowerOfTwo(depthD);
            int nearestCount = nearestW * nearestH * nearestD;

            CudaDeviceVariable<int> trisCountDevice = new CudaDeviceVariable<int>(nearestCount);
            trisCountDevice.Memset(0);
            CudaDeviceVariable<int> offsetsDev = new CudaDeviceVariable<int>(countDecremented);

            kernelMarchingCubesCases.BlockDimensions = blockDimensions;
            typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelMarchingCubesCases, gridDimensionsDecremented);

            kernelMarchingCubesCases.Run(voxelsDev.DevicePointer, width, height, depth, offsetsDev.DevicePointer, trisCountDevice.DevicePointer, nearestW, nearestH, nearestD);

            CudaDeviceVariable<int> prefixSumsDev = prefixScan.PrefixSumArray(trisCountDevice, nearestCount);

            int lastTrisCount = 0;
            trisCountDevice.CopyToHost(ref lastTrisCount, (nearestCount - 1) * sizeof(int));

            int lastPrefixSum = 0;
            prefixSumsDev.CopyToHost(ref lastPrefixSum, (nearestCount - 1) * sizeof(int));

            int totalVerticesCount = (lastTrisCount + lastPrefixSum) * 3;

            if (totalVerticesCount > 0)
            {
                if (container.Geometry != null)
                    container.Geometry.Dispose();

                container.VertexCount = totalVerticesCount;

                container.Geometry = new Buffer(graphicsDevice, new BufferDescription()
                {
                    BindFlags = BindFlags.VertexBuffer,
                    CpuAccessFlags = CpuAccessFlags.None,
                    OptionFlags = ResourceOptionFlags.None,
                    SizeInBytes = Marshal.SizeOf(typeof(VoxelMeshVertex)) * totalVerticesCount,
                    Usage = ResourceUsage.Default
                });

                CudaDirectXInteropResource directResource = new CudaDirectXInteropResource(container.Geometry.ComPointer, CUGraphicsRegisterFlags.None, CudaContext.DirectXVersion.D3D11, CUGraphicsMapResourceFlags.None);
                
                kernelMarchingCubesVertices.BlockDimensions = blockDimensions;
                typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelMarchingCubesVertices, gridDimensionsDecremented);

                directResource.Map();
                kernelMarchingCubesVertices.Run(directResource.GetMappedPointer(), voxelsDev.DevicePointer, prefixSumsDev.DevicePointer, offsetsDev.DevicePointer, width, height, depth, nearestW, nearestH, nearestD);
                directResource.UnMap();

                directResource.Dispose();
            }
            else
            {
                container.VertexCount = 0;

                if (container.Geometry != null)
                    container.Geometry.Dispose();
            }

            noiseCube.Dispose();
            prefixSumsDev.Dispose();
            trisCountDevice.Dispose();
            offsetsDev.Dispose();
            noiseArray.Dispose();
            noiseTexture.Dispose();
            voxelsDev.Dispose();
        }
开发者ID:barograf,项目名称:VoxelTerrain,代码行数:96,代码来源:CUDAGenerator.cs

示例8: Main

        static void Main(string[] args)
        {
            string filename = "vectorAdd_kernel.cu"; //we assume the file is in the same folder...
            string fileToCompile = File.ReadAllText(filename);

            CudaRuntimeCompiler rtc = new CudaRuntimeCompiler(fileToCompile, "vectorAdd_kernel");

            rtc.Compile(args);

            string log = rtc.GetLogAsString();

            Console.WriteLine(log);

            byte[] ptx = rtc.GetPTX();

            rtc.Dispose();

            CudaContext ctx = new CudaContext(0);

            CudaKernel vectorAdd = ctx.LoadKernelPTX(ptx, "vectorAdd");

            // Print the vector length to be used, and compute its size
            int numElements = 50000;
            SizeT size = numElements * sizeof(float);
            Console.WriteLine("[Vector addition of {0} elements]", numElements);

            // Allocate the host input vector A
            float[] h_A = new float[numElements];
            // Allocate the host input vector B
            float[] h_B = new float[numElements];
            // Allocate the host output vector C
            float[] h_C = new float[numElements];

            Random rand = new Random(0);

            // Initialize the host input vectors
            for (int i = 0; i < numElements; ++i)
            {
                h_A[i] = (float)rand.NextDouble();
                h_B[i] = (float)rand.NextDouble();
            }

            Console.WriteLine("Allocate and copy input data from the host memory to the CUDA device\n");
            // Allocate the device input vector A and copy to device
            CudaDeviceVariable<float> d_A = h_A;

            // Allocate the device input vector B and copy to device
            CudaDeviceVariable<float> d_B = h_B;

            // Allocate the device output vector C
            CudaDeviceVariable<float> d_C = new CudaDeviceVariable<float>(numElements);

            // Launch the Vector Add CUDA Kernel
            int threadsPerBlock = 256;
            int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
            Console.WriteLine("CUDA kernel launch with {0} blocks of {1} threads\n", blocksPerGrid, threadsPerBlock);
            vectorAdd.BlockDimensions = new dim3(threadsPerBlock,1, 1);
            vectorAdd.GridDimensions = new dim3(blocksPerGrid, 1, 1);

            vectorAdd.Run(d_A.DevicePointer, d_B.DevicePointer, d_C.DevicePointer, numElements);

            // Copy the device result vector in device memory to the host result vector
            // in host memory.
            Console.WriteLine("Copy output data from the CUDA device to the host memory\n");
            d_C.CopyToHost(h_C);

            // Verify that the result vector is correct
            for (int i = 0; i < numElements; ++i)
            {
                if (Math.Abs(h_A[i] + h_B[i] - h_C[i]) > 1e-5)
                {
                    Console.WriteLine("Result verification failed at element {0}!\n", i);
                    return;
                }
            }

            Console.WriteLine("Test PASSED\n");

            // Free device global memory
            d_A.Dispose();
            d_B.Dispose();
            d_C.Dispose();

            ctx.Dispose();
            Console.WriteLine("Done\n");
        }
开发者ID:kunzmi,项目名称:managedCuda,代码行数:86,代码来源:Program.cs

示例9: Forward

        public void Forward(CudnnTensorDescriptor srcTensor, float[] srcData, CudnnFilterDescriptor filter, float[] filterData, CudnnConvolutionDescriptor convolution, CudnnTensorDescriptor destTensor, float[] destData, CudnnAccumulateResult accumulate)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(filter != null);
            Contract.Requires(filterData != null);
            Contract.Requires(convolution != null);
            Contract.Requires(destTensor != null);
            Contract.Requires(destData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Float, srcTensor, destTensor, filter);

            using (var srcDataGpu = new CudaDeviceVariable<float>(srcData.Length))
            using (var filterDataGpu = new CudaDeviceVariable<float>(filterData.Length))
            using (var destDataGpu = new CudaDeviceVariable<float>(destData.Length))
            {
                srcDataGpu.CopyToDevice(srcData);
                filterDataGpu.CopyToDevice(filterData);

                Invoke(() => CudnnNativeMethods.cudnnConvolutionForward(handle, srcTensor.Handle, srcDataGpu.DevicePointer, filter.Handle, filterDataGpu.DevicePointer, convolution.Handle, destTensor.Handle, destDataGpu.DevicePointer, accumulate));
                destDataGpu.CopyToHost(destData);
            }
        }
开发者ID:redknightlois,项目名称:cudalearn,代码行数:24,代码来源:CudnnContext.Convolution.cs

示例10: BackwardFilter

        public void BackwardFilter(CudnnTensorDescriptor srcTensor, double[] srcData, CudnnTensorDescriptor diffTensor, double[] diffData, CudnnConvolutionDescriptor convolution, CudnnFilterDescriptor gradient, double[] gradientData, CudnnAccumulateResult accumulate)
        {
            Contract.Requires(srcTensor != null);
            Contract.Requires(srcData != null);
            Contract.Requires(diffTensor != null);
            Contract.Requires(diffData != null);
            Contract.Requires(convolution != null);
            Contract.Requires(gradient != null);
            Contract.Requires(gradientData != null);

            ThrowIfNotInitialized();
            CheckIfCompatible(CudnnType.Double, srcTensor, diffTensor, gradient);

            using (var srcDataGpu = new CudaDeviceVariable<double>(srcData.Length))
            using (var diffDataGpu = new CudaDeviceVariable<double>(diffData.Length))
            using (var gradientDataGpu = new CudaDeviceVariable<double>(gradientData.Length))
            {
                srcDataGpu.CopyToDevice(srcData);
                diffDataGpu.CopyToDevice(diffData);

                Invoke(() => CudnnNativeMethods.cudnnConvolutionBackwardFilter(handle, srcTensor.Handle, srcDataGpu.DevicePointer, diffTensor.Handle, diffDataGpu.DevicePointer, convolution.Handle, gradient.Handle, gradientDataGpu.DevicePointer, accumulate));
                gradientDataGpu.CopyToHost(gradientData);
            }
        }
开发者ID:redknightlois,项目名称:cudalearn,代码行数:24,代码来源:CudnnContext.Convolution.cs


注:本文中的CudaDeviceVariable.CopyToHost方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。