当前位置: 首页>>代码示例>>C#>>正文


C# CudaDeviceVariable.Memset方法代码示例

本文整理汇总了C#中CudaDeviceVariable.Memset方法的典型用法代码示例。如果您正苦于以下问题:C# CudaDeviceVariable.Memset方法的具体用法?C# CudaDeviceVariable.Memset怎么用?C# CudaDeviceVariable.Memset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在CudaDeviceVariable的用法示例。


在下文中一共展示了CudaDeviceVariable.Memset方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: Run

        public void Run(MatOperation operation, CudaDeviceVariable<float> A, int ACount, int AColumnHint, CudaDeviceVariable<float> B, int BCount, int BColumnHint, CudaDeviceVariable<float> Result, int ResultCount, int ResultColumnHint, float beta = 1.0f)
        {
            Result.Memset(BitConverter.ToUInt32(BitConverter.GetBytes(0.0f), 0));

            switch (operation)
            {
                case MatOperation.Multiplication:  // vectors/matrices have to be always in the correct dimesions!
                    if (BCount > 1 && ACount > 1 && BColumnHint == 1 && ACount / AColumnHint > 1 && BCount / BColumnHint == AColumnHint) //. A*vecB
                    {
                        MyCublasFactory.Instance.Gemv(Operation.Transpose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                            AColumnHint, ACount / AColumnHint, 1.0f,
                            A, AColumnHint,
                            B, 1,
                            beta, Result, 1);
                    }
                    else if (ACount > 1 && BCount > 1 && ACount / AColumnHint == 1 && BColumnHint > 1 && BCount / BColumnHint == AColumnHint)  // vecA*B
                    {
                        MyCublasFactory.Instance.Gemv(Operation.NonTranspose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                            BColumnHint, BCount / BColumnHint, 1.0f,
                            B, BColumnHint,
                            A, 1,
                            beta, Result, 1);
                    }
                    else if (ACount / AColumnHint == 1 && BColumnHint == 1 && ACount > 1 && BCount > 1) //. trans(vecA) * vecB
                    {
                        Run(MatOperation.DotProd, A, ACount, AColumnHint, B, BCount, BColumnHint, Result, ResultCount, ResultColumnHint, beta);
                    }
                    else if (ACount != 1 || BCount != 1)// A*B   matrix multiplication
                    {
                        MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.NonTranspose,
                            ACount / AColumnHint, BColumnHint, AColumnHint, 1.0f,
                            A, ACount / AColumnHint,
                            B, BCount / BColumnHint,
                            beta, Result, ResultColumnHint);
                    }
                    break;
                case MatOperation.DotProd:
                    MyCublasFactory.Instance.Gemv(Operation.Transpose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                       ACount, 1, 1.0f,
                       A, ACount,
                       B, 1,
                       beta, Result, 1);
                    break;
                default:
                    MyLog.Writer.WriteLine(MyLogLevel.ERROR, "Trying to run cublas for undefined MatOperation");
                    break;
            }
        }
开发者ID:Jlaird,项目名称:BrainSimulator,代码行数:48,代码来源:MyMatrixCublasOps.cs

示例2: Run

        public void Run(MatOperation operation, CudaDeviceVariable<float> A, int ACount, int AColumnHint, CudaDeviceVariable<float> B, int BCount, int BColumnHint, CudaDeviceVariable<float> Result, int ResultCount, int ResultColumnHint, float beta = 1.0f)
        {
            Result.Memset(BitConverter.ToUInt32(BitConverter.GetBytes(0.0f), 0));

            switch (operation)
            {
                case MatOperation.Multiplication:  // vectors/matrices have to be always in the correct dimesions!
                    if (BCount > 1 && ACount >= 1 && BColumnHint == 1 && ACount / AColumnHint > 1 && BCount / BColumnHint == AColumnHint) //. A*vecB
                    {
                        MyCublasFactory.Instance.Gemv(Operation.Transpose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                            AColumnHint, ACount / AColumnHint, 1.0f,
                            A, AColumnHint,
                            B, 1,
                            beta, Result, 1);
                    }
                    else if (ACount >= 1 && BCount > 1 && ACount / AColumnHint == 1 && BColumnHint > 1 && BCount / BColumnHint == AColumnHint)  // vecA*B
                    {
                        MyCublasFactory.Instance.Gemv(Operation.NonTranspose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                            BColumnHint, BCount / BColumnHint, 1.0f,
                            B, BColumnHint,
                            A, 1,
                            beta, Result, 1);
                    }
                    else if (ACount / AColumnHint == 1 && BColumnHint == 1 && ACount > 1 && BCount > 1) //. trans(vecA) * vecB
                    {
                        Run(MatOperation.DotProd, A, ACount, AColumnHint, B, BCount, BColumnHint, Result, ResultCount, ResultColumnHint, beta);
                    }
                    else if (ACount != 1 || BCount != 1)// A*B   matrix multiplication
                    {
                        // Cublas is using fortran matrices.. thus tey have to be swapped such as described in: http://peterwittek.com/cublas-matrix-c-style.html
                        int m = BColumnHint;
                        int n = ACount / AColumnHint;
                        int k = AColumnHint;
                        int lda = BColumnHint;
                        int ldb = AColumnHint;
                        int ldc = ResultColumnHint;
                        MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.NonTranspose,
                            m, n, k, 1.0f,
                            B, lda,
                            A, ldb,
                            beta, Result, ldc);
                    }
                    break;
                case MatOperation.DotProd:

                    if (ACount != BCount || ResultCount != 1)
                    {
                        MyLog.Writer.WriteLine(MyLogLevel.ERROR, callee.Name + ": Inconsistent vector dimensions for MyMatrixCublasOps.");
                        break;
                    }

                    MyCublasFactory.Instance.Gemv(Operation.Transpose,  // transpose beacuase it does Ax row wise if x is a row vector :D
                       ACount, 1, 1.0f,
                       A, ACount,
                       B, 1,
                       beta, Result, 1);
                    break;
                default:
                    MyLog.Writer.WriteLine(MyLogLevel.ERROR, "Trying to run cublas for undefined MatOperation");
                    break;
            }
        }
开发者ID:sschocke,项目名称:BrainSimulator,代码行数:62,代码来源:MyMatrixCublasOps.cs

示例3: Reset

        protected override void Reset()
        {
            base.Reset();

            isScreenClear = false;

            // Allocate the history
            m_HistoryDeviceBuffer = new CudaDeviceVariable<float>(m_Rows * m_Cols);
            m_HistoryDeviceBuffer.Memset(0);

            m_History = new List<string>();
            m_History.Add("");
        }
开发者ID:steuerj,项目名称:BrainSimulator,代码行数:13,代码来源:MyTextObserver.cs

示例4: Generate

        private void Generate(CudaKernel kernelPositionWeight, int width, int height, int depth)
        {
            int count = width * height * depth;
            int widthD = width - 1;
            int heightD = height - 1;
            int depthD = depth - 1;
            int countDecremented = widthD * heightD * depthD;

            dim3 blockDimensions = new dim3(8, 8, 8);
            dim3 gridDimensions = new dim3((int)Math.Ceiling(width / 8.0), (int)Math.Ceiling(height / 8.0), (int)Math.Ceiling(depth / 8.0));
            dim3 gridDimensionsDecremented = new dim3((int)Math.Ceiling(widthD / 8.0), (int)Math.Ceiling(heightD / 8.0), (int)Math.Ceiling(depthD / 8.0));

            CUDANoiseCube noiseCube = new CUDANoiseCube();

            CudaArray3D noiseArray = noiseCube.GenerateUniformArray(16, 16, 16);
            CudaTextureArray3D noiseTexture = new CudaTextureArray3D(kernelPositionWeight, "noiseTexture", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates, noiseArray);

            CudaDeviceVariable<Voxel> voxelsDev = new CudaDeviceVariable<Voxel>(count);

            kernelPositionWeight.BlockDimensions = blockDimensions;
            typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelPositionWeight, gridDimensions);

            kernelPositionWeight.Run(voxelsDev.DevicePointer, width, height, depth);

            kernelNormalAmbient.BlockDimensions = blockDimensions;
            typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelNormalAmbient, gridDimensions);

            kernelNormalAmbient.Run(voxelsDev.DevicePointer, width, height, depth, container.Settings.AmbientRayWidth, container.Settings.AmbientSamplesCount);

            int nearestW = NearestPowerOfTwo(widthD);
            int nearestH = NearestPowerOfTwo(heightD);
            int nearestD = NearestPowerOfTwo(depthD);
            int nearestCount = nearestW * nearestH * nearestD;

            CudaDeviceVariable<int> trisCountDevice = new CudaDeviceVariable<int>(nearestCount);
            trisCountDevice.Memset(0);
            CudaDeviceVariable<int> offsetsDev = new CudaDeviceVariable<int>(countDecremented);

            kernelMarchingCubesCases.BlockDimensions = blockDimensions;
            typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelMarchingCubesCases, gridDimensionsDecremented);

            kernelMarchingCubesCases.Run(voxelsDev.DevicePointer, width, height, depth, offsetsDev.DevicePointer, trisCountDevice.DevicePointer, nearestW, nearestH, nearestD);

            CudaDeviceVariable<int> prefixSumsDev = prefixScan.PrefixSumArray(trisCountDevice, nearestCount);

            int lastTrisCount = 0;
            trisCountDevice.CopyToHost(ref lastTrisCount, (nearestCount - 1) * sizeof(int));

            int lastPrefixSum = 0;
            prefixSumsDev.CopyToHost(ref lastPrefixSum, (nearestCount - 1) * sizeof(int));

            int totalVerticesCount = (lastTrisCount + lastPrefixSum) * 3;

            if (totalVerticesCount > 0)
            {
                if (container.Geometry != null)
                    container.Geometry.Dispose();

                container.VertexCount = totalVerticesCount;

                container.Geometry = new Buffer(graphicsDevice, new BufferDescription()
                {
                    BindFlags = BindFlags.VertexBuffer,
                    CpuAccessFlags = CpuAccessFlags.None,
                    OptionFlags = ResourceOptionFlags.None,
                    SizeInBytes = Marshal.SizeOf(typeof(VoxelMeshVertex)) * totalVerticesCount,
                    Usage = ResourceUsage.Default
                });

                CudaDirectXInteropResource directResource = new CudaDirectXInteropResource(container.Geometry.ComPointer, CUGraphicsRegisterFlags.None, CudaContext.DirectXVersion.D3D11, CUGraphicsMapResourceFlags.None);
                
                kernelMarchingCubesVertices.BlockDimensions = blockDimensions;
                typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelMarchingCubesVertices, gridDimensionsDecremented);

                directResource.Map();
                kernelMarchingCubesVertices.Run(directResource.GetMappedPointer(), voxelsDev.DevicePointer, prefixSumsDev.DevicePointer, offsetsDev.DevicePointer, width, height, depth, nearestW, nearestH, nearestD);
                directResource.UnMap();

                directResource.Dispose();
            }
            else
            {
                container.VertexCount = 0;

                if (container.Geometry != null)
                    container.Geometry.Dispose();
            }

            noiseCube.Dispose();
            prefixSumsDev.Dispose();
            trisCountDevice.Dispose();
            offsetsDev.Dispose();
            noiseArray.Dispose();
            noiseTexture.Dispose();
            voxelsDev.Dispose();
        }
开发者ID:barograf,项目名称:VoxelTerrain,代码行数:96,代码来源:CUDAGenerator.cs

示例5: Main


//.........这里部分代码省略.........

            // create CUDA event handles
            // use blocking sync
            CudaEvent start_event, stop_event;
            CUEventFlags eventflags = ((device_sync_method == CUCtxFlags.BlockingSync) ? CUEventFlags.BlockingSync : CUEventFlags.Default);

            start_event = new CudaEvent(eventflags);
            stop_event = new CudaEvent(eventflags);

            // time memcopy from device
            start_event.Record();     // record in stream-0, to ensure that all previous CUDA calls have completed
            hAligned_a.AsyncCopyToDevice(d_a, streams[0].Stream);
            stop_event.Record();
            stop_event.Synchronize();   // block until the event is actually recorded
            time_memcpy = CudaEvent.ElapsedTime(start_event, stop_event);
            Console.Write("memcopy:\t{0:0.00}\n", time_memcpy);

            // time kernel
            threads = new dim3(512, 1);
            blocks = new dim3(n / (int)threads.x, 1);
            start_event.Record();
            init_array.BlockDimensions = threads;
            init_array.GridDimensions = blocks;
            init_array.RunAsync(streams[0].Stream, d_a.DevicePointer, d_c.DevicePointer, niterations);
            stop_event.Record();
            stop_event.Synchronize();
            time_kernel = CudaEvent.ElapsedTime(start_event, stop_event);
            Console.Write("kernel:\t\t{0:0.00}\n", time_kernel);

            //////////////////////////////////////////////////////////////////////
            // time non-streamed execution for reference
            threads = new dim3(512, 1);
            blocks = new dim3(n / (int)threads.x, 1);
            start_event.Record();
            for(int k = 0; k < nreps; k++)
            {
                init_array.BlockDimensions = threads;
                init_array.GridDimensions = blocks;
                init_array.Run(d_a.DevicePointer, d_c.DevicePointer, niterations);
                hAligned_a.SynchronCopyToHost(d_a);
            }
            stop_event.Record();
            stop_event.Synchronize();
            elapsed_time = CudaEvent.ElapsedTime(start_event, stop_event);
            Console.Write("non-streamed:\t{0:0.00} ({1:00} expected)\n", elapsed_time / nreps, time_kernel + time_memcpy);

            //////////////////////////////////////////////////////////////////////
            // time execution with nstreams streams
            threads = new dim3(512, 1);
            blocks = new dim3(n / (int)(nstreams * threads.x), 1);
            byte[] memset = new byte[nbytes]; // set host memory bits to all 1s, for testing correctness
            for (int i = 0; i < nbytes; i++)
            {
                memset[i] = 255;
            }
            System.Runtime.InteropServices.Marshal.Copy(memset, 0, hAligned_a.PinnedHostPointer, nbytes);
            d_a.Memset(0); // set device memory to all 0s, for testing correctness

            start_event.Record();
            for(int k = 0; k < nreps; k++)
            {
                init_array.BlockDimensions = threads;
                init_array.GridDimensions = blocks;
                // asynchronously launch nstreams kernels, each operating on its own portion of data
                for(int i = 0; i < nstreams; i++)
                    init_array.RunAsync(streams[i].Stream, d_a.DevicePointer + i * n / nstreams * sizeof(int), d_c.DevicePointer, niterations);

                // asynchronously launch nstreams memcopies.  Note that memcopy in stream x will only
                //   commence executing when all previous CUDA calls in stream x have completed
                for (int i = 0; i < nstreams; i++)
                    hAligned_a.AsyncCopyFromDevice(d_a, i * n / nstreams * sizeof(int), i * n / nstreams * sizeof(int), nbytes / nstreams, streams[i].Stream);
            }
            stop_event.Record();
            stop_event.Synchronize();
            elapsed_time = CudaEvent.ElapsedTime(start_event, stop_event);
            Console.Write("{0} streams:\t{1:0.00} ({2:0.00} expected with compute capability 1.1 or later)\n", nstreams, elapsed_time / nreps, time_kernel + time_memcpy / nstreams);

            // check whether the output is correct
            Console.Write("-------------------------------\n");
            //We can directly access data in hAligned_a using the [] operator, but copying
            //data first to h_a is faster.
            System.Runtime.InteropServices.Marshal.Copy(hAligned_a.PinnedHostPointer, h_a, 0, nbytes / sizeof(int));

            bool bResults = correct_data(h_a, n, c*nreps*niterations);

            // release resources
            for(int i = 0; i < nstreams; i++) {
                streams[i].Dispose();
            }
            start_event.Dispose();
            stop_event.Dispose();

            hAligned_a.Dispose();
            d_a.Dispose();
            d_c.Dispose();
            CudaContext.ProfilerStop();
            ctx.Dispose();

            ShrQATest.shrQAFinishExit(args, bResults ? ShrQATest.eQAstatus.QA_PASSED : ShrQATest.eQAstatus.QA_FAILED);
        }
开发者ID:kunzmi,项目名称:managedCuda,代码行数:101,代码来源:Program.cs

示例6: Reset

        protected override void Reset()
        {
            TextureHeight = BIN_PIXEL_HEIGHT;
            TextureWidth = BIN_PIXEL_WIDTH * BINS;

            if (m_d_HistogramData != null)
            {
                m_d_HistogramData.Dispose();
            }
            m_d_HistogramData = new CudaDeviceVariable<int>(BINS);
            m_d_HistogramData.Memset(0);
        }
开发者ID:Jlaird,项目名称:BrainSimulator,代码行数:12,代码来源:MyHistogramObserver.cs


注:本文中的CudaDeviceVariable.Memset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。