当前位置: 首页>>代码示例>>C#>>正文


C# Context.CreateBuffer方法代码示例

本文整理汇总了C#中Context.CreateBuffer方法的典型用法代码示例。如果您正苦于以下问题:C# Context.CreateBuffer方法的具体用法?C# Context.CreateBuffer怎么用?C# Context.CreateBuffer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Context的用法示例。


在下文中一共展示了Context.CreateBuffer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。

示例1: TestHostToDeviceTransferPinned

        private double TestHostToDeviceTransferPinned(Context context, CommandQueue commandQueue, int memSize, AccessMode accessMode)
        {
            // Create a host buffer
            using (Buffer pinnedData = context.CreateBuffer(MemoryFlags.ReadWrite | MemoryFlags.AllocateHostPointer, memSize))
            {
                // get a mapped pointer
                IntPtr h_data;
                commandQueue.EnqueueMapBuffer(pinnedData, true, MapFlags.Write, 0, memSize, out h_data);

                // initialize
                for (int i = 0; i < memSize; i++)
                    Marshal.WriteByte(h_data, i, (byte)i);

                // unmap and make data in the host buffer valid
                commandQueue.EnqueueUnmapMemObject(pinnedData, h_data);

                // allocate device memory
                using (Buffer deviceData = context.CreateBuffer(MemoryFlags.ReadWrite, memSize))
                {
                    // sync queue to host
                    commandQueue.Finish();
                    Stopwatch timer = Stopwatch.StartNew();
                    if (accessMode == AccessMode.Direct)
                    {
                        commandQueue.EnqueueMapBuffer(pinnedData, true, MapFlags.Read, 0, memSize, out h_data);

                        // DIRECT: API access to device buffer
                        for (int i = 0; i < MemoryCopyIterations; i++)
                        {
                            commandQueue.EnqueueWriteBuffer(deviceData, false, 0, memSize, h_data);
                        }

                        commandQueue.Finish();
                    }
                    else
                    {
                        // MAPPED: mapped pointers to device buffer for conventional pointer access
                        IntPtr dm_idata;
                        commandQueue.EnqueueMapBuffer(deviceData, true, MapFlags.Write, 0, memSize, out dm_idata);
                        commandQueue.EnqueueMapBuffer(pinnedData, true, MapFlags.Read, 0, memSize, out h_data);

                        for (int i = 0; i < MemoryCopyIterations; i++)
                        {
                            CopyMemory(dm_idata, h_data, (UIntPtr)memSize);
                        }

                        commandQueue.EnqueueUnmapMemObject(deviceData, dm_idata);
                    }

                    //get the the elapsed time in seconds
                    double elapsedTimeInSeconds = timer.Elapsed.TotalSeconds;

                    // Calculate bandwidth in MB/s
                    //      This is for kernels that read and write GMEM simultaneously
                    //      Obtained Throughput for unidirectional block copies will be 1/2 of this #
                    double bandwidthInMBs = 2.0 * ((double)memSize * (double)MemoryCopyIterations) / (elapsedTimeInSeconds * (double)(1 << 20));

                    return bandwidthInMBs;
                }
            }
        }
开发者ID:gitter-badger,项目名称:NOpenCL,代码行数:61,代码来源:Bandwidth.cs

示例2: TestDeviceToDeviceTransfer

        private double TestDeviceToDeviceTransfer(Context context, CommandQueue commandQueue, int memorySize)
        {
            if (context == null)
                throw new ArgumentNullException("context");

            double elapsedTimeInSeconds = 0.0;
            double bandwidthInMBs = 0.0;

            // allocate host memory
            byte[] data = new byte[memorySize];

            // initialize the memory
            for (int i = 0; i < memorySize; i++)
                data[i] = 0xFF;

            // allocate device input and output memory and initialize the device input memory
            using (Buffer d_idata = context.CreateBuffer(MemoryFlags.ReadOnly, memorySize),
                d_odata = context.CreateBuffer(MemoryFlags.WriteOnly, memorySize))
            {
                unsafe
                {
                    fixed (byte* rawData = data)
                    {
                        using (commandQueue.EnqueueWriteBuffer(d_idata, true, 0, memorySize, (IntPtr)rawData))
                        {
                        }
                    }
                }

                // sync queue to host, start timer 0, and copy data from one GPU buffer to another GPU buffer
                commandQueue.Finish();
                Stopwatch timer = Stopwatch.StartNew();
                for (int i = 0; i < MemoryCopyIterations; i++)
                {
                    using (commandQueue.EnqueueCopyBuffer(d_idata, d_odata, 0, 0, memorySize))
                    {
                    }
                }

                // sync with GPU
                commandQueue.Finish();

                // get the elapsed time in seconds
                elapsedTimeInSeconds = timer.Elapsed.TotalSeconds;

                // Calculate bandwidth in MB/s
                //      This is for kernels that read and write GMEM simultaneously
                //      Obtained Throughput for unidirectional block copies will be 1/2 of this #
                bandwidthInMBs = 2.0 * ((double)memorySize * (double)MemoryCopyIterations) / (elapsedTimeInSeconds * (double)(1 << 20));
            }

            return bandwidthInMBs;
        }
开发者ID:gitter-badger,项目名称:NOpenCL,代码行数:53,代码来源:Bandwidth.cs

示例3: TestHostToDeviceTransferPaged

        private unsafe double TestHostToDeviceTransferPaged(Context context, CommandQueue commandQueue, int memSize, AccessMode accessMode)
        {
            // standard host allocation
            byte[] data = new byte[memSize];
            for (int i = 0; i < data.Length; i++)
                data[i] = (byte)i;

            fixed (byte* pdata = data)
            {
                // allocate device memory
                using (Buffer deviceData = context.CreateBuffer(MemoryFlags.ReadWrite, memSize))
                {
                    // sync queue to host
                    commandQueue.Finish();
                    Stopwatch timer = Stopwatch.StartNew();
                    if (accessMode == AccessMode.Direct)
                    {
                        // DIRECT: API access to device buffer
                        for (int i = 0; i < MemoryCopyIterations; i++)
                        {
                            commandQueue.EnqueueWriteBuffer(deviceData, false, 0, memSize, (IntPtr)pdata);
                        }

                        commandQueue.Finish();
                    }
                    else
                    {
                        // MAPPED: mapped pointers to device buffer for conventional pointer access
                        IntPtr dm_idata;
                        commandQueue.EnqueueMapBuffer(deviceData, true, MapFlags.Write, 0, memSize, out dm_idata);
                        for (int i = 0; i < MemoryCopyIterations; i++)
                        {
                            CopyMemory(dm_idata, (IntPtr)pdata, (UIntPtr)memSize);
                        }

                        commandQueue.EnqueueUnmapMemObject(deviceData, dm_idata);
                    }

                    //get the the elapsed time in seconds
                    double elapsedTimeInSeconds = timer.Elapsed.TotalSeconds;

                    // Calculate bandwidth in MB/s
                    //      This is for kernels that read and write GMEM simultaneously
                    //      Obtained Throughput for unidirectional block copies will be 1/2 of this #
                    double bandwidthInMBs = 2.0 * ((double)memSize * (double)MemoryCopyIterations) / (elapsedTimeInSeconds * (double)(1 << 20));

                    return bandwidthInMBs;
                }
            }
        }
开发者ID:gitter-badger,项目名称:NOpenCL,代码行数:50,代码来源:Bandwidth.cs

示例4: ECCTest

        static void ECCTest()
        {
            const int BigIntBytes = 4 * 8;
            const int PointBytes = BigIntBytes * 3;
            const int InputWorkItemBytes = PointBytes + BigIntBytes;
            const int OutputWorkItemBytes = PointBytes;
            uint[] x1 = new uint[] {0x895aa032, 0x0d07522a, 0x506abf79, 0xabbc5c54, 0x1c2d6914, 0xb758abae, 0x914fa51b, 0xdfa23008};
            uint[] y1 = new uint[] {0xefa18861, 0x602dfbbd, 0xe98d5b8c, 0xf884eb9e, 0x9898b025, 0x022e6bad, 0x31f238ee, 0x0bf40155};
            uint[] z1 = new uint[] {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
            uint[] x2 = new uint[] {0xd6d7e35d, 0x2febd950, 0x2f987f4d, 0xb30482f7, 0x1164ce2e, 0xfce2b6ce, 0x12367d71, 0x15c1cdd1};
            uint[] y2 = new uint[] {0xc1add051, 0x2dcfd682, 0x0d53b2d6, 0xbd9ad440, 0xad0f523b, 0x559ebb59, 0x45d34876, 0xdd307c87};
            uint[] z2 = new uint[] {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
            uint[] x3 = new uint[] {0xb75c6254, 0x7b278510, 0xf45598f8, 0xdb81bb86, 0x4c48ee2b, 0x1dfe6ba4, 0xcbb54aa0, 0x616966b1};
            uint[] y3 = new uint[] {0x356c3d49, 0x3c98aa53, 0xff99ca5b, 0x3d58a64f, 0xc0ac8b7e, 0x65168611, 0x0bb52f28, 0x9defd775};
            uint[] z3 = new uint[] {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
            uint[] s  = new uint[] {0x9b2d206a, 0x8a022706, 0x5ce5a47a, 0x9f363b87, 0xcac90283, 0x2004790d, 0x1f2e5787, 0xadeba125};
            uint[] x = new uint[8], y = new uint[8], z = new uint[8];

            using (Context context = new Context (DeviceType.GPU))
            using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default))
            using (CLProgram prog = context.CreateProgram (OclCodeStore.GetOclCode ("ecc-p256"), context.Devices, null)) {
                int maxWorkItemSize = (int)queue.Device.MaxWorkItemSizes[0] / 2;
                int parallels = (int)queue.Device.MaxComputeUnits * maxWorkItemSize;
                int local_size = maxWorkItemSize;
                while (local_size > parallels)
                    local_size >>= 1;

                using (Memory inMem = context.CreateBuffer (MemoryFlags.ReadOnly, InputWorkItemBytes * parallels))
                using (Memory outMem = context.CreateBuffer (MemoryFlags.WriteOnly, OutputWorkItemBytes * parallels))
                using (Kernel kernel = prog.CreateKernel ("Test")) {
                    kernel.SetArgument (0, inMem);
                    kernel.SetArgument (1, outMem);

                    {
                        int wrote = 0;
                        for (int i = 0; i < parallels; i ++) {
                            queue.WriteBuffer (inMem, wrote, x1, 0, BigIntBytes); wrote += BigIntBytes;
                            queue.WriteBuffer (inMem, wrote, y1, 0, BigIntBytes); wrote += BigIntBytes;
                            queue.WriteBuffer (inMem, wrote, z1, 0, BigIntBytes); wrote += BigIntBytes;
                            queue.WriteBuffer (inMem, wrote, s, 0, BigIntBytes); wrote += BigIntBytes;
                            s[0] ++;
                        }
                    }

                    TimeSpan time = Execute (null, 1, 0, delegate () {
                        queue.Execute (kernel, 0, parallels, local_size);
                    });
                    Console.WriteLine ("{0} mul/s", parallels / time.TotalSeconds);

                    {
                        int read = 0;
                        queue.ReadBuffer (outMem, read, x, 0, BigIntBytes); read += BigIntBytes;
                        queue.ReadBuffer (outMem, read, y, 0, BigIntBytes); read += BigIntBytes;
                        queue.ReadBuffer (outMem, read, z, 0, BigIntBytes);
                    }
                }
            }

            /*for (int i = 0; i < 8; i ++)
                Console.WriteLine ("x[{0}]=0x{1:x8}  y[{0}]=0x{2:x8}  z[{0}]=0x{3:x8}", i, x[i], y[i], z[i]);

            Console.WriteLine ("cmpl");
            Console.ReadLine ();*/
        }
开发者ID:kazuki,项目名称:oclcrypto,代码行数:64,代码来源:Program.cs

示例5: SHATest

        static void SHATest(int parallels)
        {
            const int memTests = 100;
            const int updateTests = 100;
            TimeSpan total = TimeSpan.Zero;
            //int parallels;
            int blocks_per_instance = 1024;
            byte[] input, state, state_ref;

            using (Context context = new Context (DeviceType.GPU))
            using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default)) {
                int local_size = (int)queue.Device.MaxWorkItemSizes[0] / 2;
                parallels = local_size * (int)queue.Device.MaxComputeUnits;
                input = new byte[SHA256.MessageSize * parallels * blocks_per_instance];
                state = new byte[parallels * SHA256.StateSize];
                state_ref = new byte[parallels * SHA256.StateSize];
                new Random ().NextBytes (input);

                // balance
                if (parallels < local_size) {
                    local_size = parallels / (int)queue.Device.MaxComputeUnits;
                    local_size = (int)Math.Pow (2, Math.Ceiling (Math.Log (local_size, 2)));
                    if (local_size <= 0)
                        local_size = 1;
                }

                using (CLProgram prog = context.CreateProgram (OclCodeStore.GetOclCode ("sha-256"), context.Devices, null))
                using (Memory inMem = context.CreateBuffer (MemoryFlags.ReadOnly, input.Length))
                using (Memory stateMem = context.CreateBuffer (MemoryFlags.ReadWrite, state.Length))
                using (Memory constMem = context.CreateBuffer (MemoryFlags.ReadOnly, 4 * SHA256.Constants.Length))
                using (Kernel kernel = prog.CreateKernel ("core256")) {
                    // Copy constant values
                    queue.WriteBuffer (constMem, 0, SHA256.Constants, 0, SHA256.Constants.Length * 4);

                    // Init State
                    uint[] temp = new uint[parallels * SHA256.StateSize / 4];
                    for (int i = 0; i < parallels; i++) {
                        for (int j = 0; j < SHA256.InitialValues.Length; j++)
                            temp[i * SHA256.InitialValues.Length + j] = SHA256.InitialValues[j];
                    }
                    queue.WriteBuffer (stateMem, 0, temp, 0, temp.Length * 4);

                    int global_size = parallels;
                    int max_local_size = local_size;
                    while (local_size > global_size || local_size > max_local_size)
                        local_size >>= 1;

                    // Setup Kernel Arguments
                    kernel.SetArgument (0, inMem);
                    kernel.SetArgument (1, stateMem);
                    kernel.SetArgument (2, constMem);
                    kernel.SetLocalDataShare (3, 4 * SHA256.Constants.Length);
                    kernel.SetArgument (4, blocks_per_instance, 4);

                    total += Execute ("write", memTests, input.Length, delegate () {
                        queue.WriteBuffer (inMem, 0, input, 0, input.Length);
                    });

                    total += Execute ("kernel", updateTests, input.Length, delegate () {
                        queue.Execute (kernel, 0, global_size, local_size);
                    });

                    total += Execute ("read", memTests, state.Length, delegate () {
                        queue.ReadBuffer (stateMem, 0, state, 0, state.Length);
                    });
                    WriteTime ("total", total, input.Length);
                }
            }

            #if false
            SHA256.InitState (state_ref);
            for (int i = 0; i < blocks_per_instance; i ++) {
                SHA256.Update (input, i * SHA256.MessageSize * parallels, SHA256.MessageSize * parallels, state_ref);
            }
            for (int i = 0; i < state.Length; i++) {
                if (state[i] != state_ref[i]) {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine ("err");
                    Console.ForegroundColor = ConsoleColor.White;
                    break;
                }
            }
            #endif
            //Console.WriteLine ("cmpl");
            //Console.ReadLine ();
        }
开发者ID:kazuki,项目名称:oclcrypto,代码行数:86,代码来源:Program.cs

示例6: CamelliaTest2

        static void CamelliaTest2()
        {
            const int memTests = 1;
            const int encryptTests = 1;
            const int ProcessUnitDataSize = 16 * 32; // 32bit-width bitslice
            byte[] key = new byte[16];
            byte[] input = new byte[ProcessUnitDataSize * 2 * 1024 * 64];
            byte[] output = new byte[input.Length];
            byte[] output_ref = new byte[input.Length];
            uint[] keyTable;
            new Random ().NextBytes (key);
            new Random ().NextBytes (input);
            Camellia.GenerateKeyTable (key, out keyTable);
            TimeSpan total = TimeSpan.Zero;

            using (Context context = new Context (DeviceType.GPU))
            using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default))
            using (CLProgram prog = context.CreateProgram (OclCodeStore.GetOclCode ("bitslice_camellia"), context.Devices, null))
            using (Memory mem = context.CreateBuffer (MemoryFlags.ReadWrite, input.Length)) {
                /*using (Memory keyMem = context.CreateBuffer (MemoryFlags.ReadWrite, expandedKey.Length * 32))
                using (Memory nonsliceKeyMem = context.CreateBuffer (MemoryFlags.WriteOnly, expandedKey.Length))
                using (Kernel kernel = prog.CreateKernel ("bitslice_key")) {
                    kernel.SetArgument (0, nonsliceKeyMem);
                    kernel.SetArgument (1, keyMem);
                    queue.WriteBuffer (nonsliceKeyMem, 0, expandedKey, 0, expandedKey.Length);
                    queue.Execute (kernel, 0, expandedKey.Length * 8 / 4, 8);
                }*/

                int localMemorySize = (int)(queue.Device.LocalMemSize / 2);
                int maxWorkItemSize = (int)queue.Device.MaxWorkItemSizes[0];

                // global/local size setting for encrypt kernel
                int global_size = input.Length / ProcessUnitDataSize;
                int local_size = int.MaxValue;// localMemorySize / 512;
                local_size = Math.Min (local_size, maxWorkItemSize);
                local_size = Math.Min (local_size, global_size);

                // global/local size setting for bitslice kernel
                int slice_global_size = input.Length / ProcessUnitDataSize * 32;
                int slice_local_size = (localMemorySize / 512) * 32;
                slice_local_size = Math.Min (slice_local_size, maxWorkItemSize);
                slice_local_size = Math.Min (slice_local_size, slice_global_size);

                total += Execute ("write", memTests, input.Length, delegate () {
                    queue.WriteBuffer (mem, 0, input, 0, input.Length);
                });

                using (Kernel kernel_encrypt = prog.CreateKernel ("encrypt"))
                using (Kernel kernel_bitslice = prog.CreateKernel ("bitslice_kernel"))
                using (Kernel kernel_shuffle1 = prog.CreateKernel ("shuffle_state1"))
                using (Kernel kernel_shuffle2 = prog.CreateKernel ("shuffle_state2")) {
                    kernel_bitslice.SetArgument (0, mem);
                    kernel_bitslice.SetLocalDataShare (1, 512 * slice_local_size / 32);
                    kernel_shuffle1.SetArgument (0, mem);
                    kernel_shuffle1.SetLocalDataShare (1, 512 * slice_local_size / 32);
                    kernel_shuffle2.SetArgument (0, mem);
                    kernel_shuffle2.SetLocalDataShare (1, 512 * slice_local_size / 32);
                    kernel_encrypt.SetArgument (0, mem);
                    //kernel_encrypt.SetLocalDataShare (1, 512 * local_size);

                    total += Execute ("kernel(bitslice)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_bitslice, 0, slice_global_size, slice_local_size);
                    });
                    total += Execute ("kernel(shuffle)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_shuffle1, 0, slice_global_size, slice_local_size);
                    });
                    total += Execute ("kernel(encrypt)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_encrypt, 0, global_size, local_size);
                    });
                    total += Execute ("kernel(shuffle)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_shuffle2, 0, slice_global_size, slice_local_size);
                    });
                    total += Execute ("kernel(unbitslice)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_bitslice, 0, slice_global_size, slice_local_size);
                    });
                }
                total += Execute ("read", memTests, input.Length, delegate () {
                    queue.ReadBuffer (mem, 0, output, 0, output.Length);
                });
            }

            WriteTime ("total", total, input.Length);
            #if true
            Camellia.Encrypt (key, input, output_ref);
            for (int i = 0; i < output.Length; i++)
                if (output[i] != output_ref[i]) {
                    ConsoleColor defColor = Console.ForegroundColor;
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine ("err");
                    Console.WriteLine ("  expected | actual | input");
                    for (int k = i; k < i + 64 && k < output.Length; k ++) {
                        Console.ForegroundColor = (output[k] == output_ref[k] ? defColor : ConsoleColor.Red);
                        Console.WriteLine ("    {0}: {1:x2} | {2:x2} | {3:x2}", k, output_ref[k], output[k], input[k]);
                    }
                    Console.ForegroundColor = defColor;
                    break;
                }
            #endif

            Console.WriteLine ("cmpl");
//.........这里部分代码省略.........
开发者ID:kazuki,项目名称:oclcrypto,代码行数:101,代码来源:Program.cs

示例7: CamelliaTest

        static void CamelliaTest()
        {
            const int memTests = 1;
            const int encryptTests = 1;
            byte[] key = new byte[16];
            byte[] input = new byte[1024 * 1024 * 64];
            byte[] output = new byte[input.Length];
            byte[] output_ref = new byte[input.Length];
            uint[] keyTable;
            new Random ().NextBytes (key);
            new Random ().NextBytes (input);
            Camellia.GenerateKeyTable (key, out keyTable);

            using (Context context = new Context (DeviceType.GPU))
            using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default))
            using (CLProgram prog = context.CreateProgram (OclCodeStore.GetOclCode ("camellia"), context.Devices, null))
            using (Memory inMem = context.CreateBuffer (MemoryFlags.ReadOnly, input.Length))
            using (Memory outMem = context.CreateBuffer (MemoryFlags.WriteOnly, input.Length))
            using (Memory keyMem = context.CreateBuffer (MemoryFlags.ReadOnly, keyTable.Length * 4))
            using (Memory sbox1Mem = context.CreateBuffer (MemoryFlags.ReadOnly, Camellia.SBOX1_1110.Length * 4))
            using (Memory sbox2Mem = context.CreateBuffer (MemoryFlags.ReadOnly, Camellia.SBOX2_0222.Length * 4))
            using (Memory sbox3Mem = context.CreateBuffer (MemoryFlags.ReadOnly, Camellia.SBOX3_3033.Length * 4))
            using (Memory sbox4Mem = context.CreateBuffer (MemoryFlags.ReadOnly, Camellia.SBOX4_4404.Length * 4)) {
                TimeSpan total = TimeSpan.Zero;
                queue.WriteBuffer (keyMem, 0, keyTable, 0, keyTable.Length * 4);
                queue.WriteBuffer (sbox1Mem, 0, Camellia.SBOX1_1110, 0, Camellia.SBOX1_1110.Length * 4);
                queue.WriteBuffer (sbox2Mem, 0, Camellia.SBOX2_0222, 0, Camellia.SBOX2_0222.Length * 4);
                queue.WriteBuffer (sbox3Mem, 0, Camellia.SBOX3_3033, 0, Camellia.SBOX3_3033.Length * 4);
                queue.WriteBuffer (sbox4Mem, 0, Camellia.SBOX4_4404, 0, Camellia.SBOX4_4404.Length * 4);
                queue.WriteBuffer (inMem, 0, new byte[inMem.Size], 0, (int)inMem.Size);
                queue.WriteBuffer (outMem, 0, new byte[outMem.Size], 0, (int)outMem.Size);
                const int mode = 1;
                int local_loops = 4;
                if (mode == 2 && local_loops < 4) local_loops = 4;
                using (Kernel kernel = prog.CreateKernel ("encrypt" + mode.ToString ())) {
                    kernel.SetArgument (0, inMem);
                    kernel.SetArgument (1, outMem);
                    kernel.SetArgument (2, keyMem);
                    kernel.SetArgument (3, sbox1Mem);
                    kernel.SetArgument (4, sbox2Mem);
                    kernel.SetArgument (5, sbox3Mem);
                    kernel.SetArgument (6, sbox4Mem);
                    kernel.SetLocalDataShare (7, (int)sbox1Mem.Size);
                    kernel.SetLocalDataShare (8, (int)sbox2Mem.Size);
                    kernel.SetLocalDataShare (9, (int)sbox3Mem.Size);
                    kernel.SetLocalDataShare (10, (int)sbox4Mem.Size);
                    kernel.SetArgument (11, (uint)(input.Length / 16), 4);

                    total += Execute ("write", memTests, input.Length, delegate () {
                        queue.WriteBuffer (inMem, 0, input, 0, input.Length);
                    });

                    int global_size = input.Length / 16 / local_loops;
                    int local_size = (int)queue.Device.MaxWorkItemSizes[0];
                    while (local_size > global_size)
                        local_size >>= 1;

                    total += Execute ("kernel(encrypt)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel, 0, global_size, local_size);
                    });
                }

                total += Execute ("read", memTests, input.Length, delegate () {
                    queue.ReadBuffer (outMem, 0, output, 0, output.Length);
                });
                WriteTime ("total", total, input.Length);
            }
            #if false
            Camellia.Encrypt (key, input, output_ref);
            for (int i = 0; i < output.Length; i++)
                if (output[i] != output_ref[i]) {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine ("err");
                    Console.ForegroundColor = ConsoleColor.White;
                    break;
                }
            #endif

            Console.WriteLine ("cmpl");
            Console.ReadLine ();
        }
开发者ID:kazuki,项目名称:oclcrypto,代码行数:81,代码来源:Program.cs

示例8: AESTest2

        static void AESTest2()
        {
            const int memTests = 1;
            const int encryptTests = 1;
            const int ProcessUnitDataSize = 16 * 32; // 32bit-width bitslice
            byte[] key = new byte[16];
            byte[] input = new byte[ProcessUnitDataSize * 2 * 1024 * 64];
            byte[] output = new byte[input.Length];
            byte[] output_ref = new byte[input.Length];
            byte[] expandedKey;
            new Random ().NextBytes (key);
            new Random ().NextBytes (input);
            AES.KeyExpansion (key, out expandedKey);
            TimeSpan total = TimeSpan.Zero;

            bool private_memory_mode = true;

            using (Context context = new Context (DeviceType.GPU))
            using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default))
            using (CLProgram prog = context.CreateProgram (OclCodeStore.GetOclCode (private_memory_mode ? "bitslice_aes4" : "bitslice_aes3"), context.Devices, null))
            using (Memory mem = context.CreateBuffer (MemoryFlags.ReadWrite, input.Length))
            using (Memory keyMem = context.CreateBuffer (MemoryFlags.ReadWrite, expandedKey.Length * 32)) {
                using (Memory nonsliceKeyMem = context.CreateBuffer (MemoryFlags.WriteOnly, expandedKey.Length))
                using (Kernel kernel = prog.CreateKernel ("bitslice_key")) {
                    kernel.SetArgument (0, nonsliceKeyMem);
                    kernel.SetArgument (1, keyMem);
                    queue.WriteBuffer (nonsliceKeyMem, 0, expandedKey, 0, expandedKey.Length);
                    queue.Execute (kernel, 0, expandedKey.Length * 8 / 4, 8);
                }

                int localMemorySize = (int)(queue.Device.LocalMemSize / 2);
                int maxWorkItemSize = (int)queue.Device.MaxWorkItemSizes[0];

                // global/local size setting for encrypt kernel
                int global_size = (private_memory_mode ? input.Length / ProcessUnitDataSize : input.Length / ProcessUnitDataSize * 4);
                int local_size = (private_memory_mode ? int.MaxValue : (localMemorySize / 512) * 4);
                local_size = Math.Min (local_size, maxWorkItemSize);
                local_size = Math.Min (local_size, global_size);

                // global/local size setting for bitslice kernel
                int slice_global_size = input.Length / ProcessUnitDataSize * 32;
                int slice_local_size = (localMemorySize / 512) * 32;
                slice_local_size = Math.Min (slice_local_size, maxWorkItemSize);
                slice_local_size = Math.Min (slice_local_size, slice_global_size);

                total += Execute ("write", memTests, input.Length, delegate () {
                    queue.WriteBuffer (mem, 0, input, 0, input.Length);
                });

                using (Kernel kernel_encrypt = prog.CreateKernel ("encrypt"))
                using (Kernel kernel_bitslice = prog.CreateKernel ("bitslice_kernel")) {
                    kernel_bitslice.SetArgument (0, mem);
                    kernel_bitslice.SetLocalDataShare (1, 512 * slice_local_size / 32);
                    kernel_encrypt.SetArgument (0, mem);
                    kernel_encrypt.SetArgument (1, keyMem);
                    if (!private_memory_mode)
                        kernel_encrypt.SetLocalDataShare (2, 512 * local_size / 4);

                    total += Execute ("kernel(bitslice)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_bitslice, 0, slice_global_size, slice_local_size);
                    });
                    total += Execute ("kernel(encrypt)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_encrypt, 0, global_size, local_size);
                    });
                    total += Execute ("kernel(unbitslice)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_bitslice, 0, slice_global_size, slice_local_size);
                    });
                }
                total += Execute ("read", memTests, input.Length, delegate () {
                    queue.ReadBuffer (mem, 0, output, 0, output.Length);
                });
            }

            WriteTime ("total", total, input.Length);
            #if true
            AES.Encrypt (key, input, output_ref);
            for (int i = 0; i < output.Length; i++)
                if (output[i] != output_ref[i]) {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine ("err");
                    Console.ForegroundColor = ConsoleColor.White;
                    break;
                }
            #endif

            Console.WriteLine ("cmpl");
            Console.ReadLine ();
        }
开发者ID:kazuki,项目名称:oclcrypto,代码行数:88,代码来源:Program.cs

示例9: AESTest

        static void AESTest()
        {
            const int memTests = 1;
            const int encryptTests = 1;
            byte[] key = new byte[16];
            byte[] input = new byte[1024 * 1024 * 64];
            byte[] output = new byte[input.Length];
            byte[] output_ref = new byte[input.Length];
            byte[] expandedKey;
            new Random ().NextBytes (key);
            new Random ().NextBytes (input);
            AES.KeyExpansion (key, out expandedKey);
            TimeSpan total = TimeSpan.Zero;

            using (Context context = new Context (DeviceType.GPU))
            using (CommandQueue queue = context.CreateCommandQueue (context.Devices[0], CommandQueueProperties.Default))
            #if false
            using (CLProgram prog = context.CreateProgram (OclCodeStore.GetOclCode ("bitslice_aes2"), context.Devices, null))
            using (Memory mem = context.CreateBuffer (MemoryFlags.ReadWrite, input.Length))
            using (Memory keyMem = context.CreateBuffer (MemoryFlags.ReadWrite, expandedKey.Length * 32)) {

                using (Memory nonsliceKeyMem = context.CreateBuffer (MemoryFlags.WriteOnly, expandedKey.Length))
                using (Kernel kernel = prog.CreateKernel ("bitslice_key")) {
                    kernel.SetArgument (0, nonsliceKeyMem);
                    kernel.SetArgument (1, keyMem);
                    queue.WriteBuffer (nonsliceKeyMem, 0, expandedKey, 0, expandedKey.Length);
                    queue.Execute (kernel, 0, expandedKey.Length * 8 / 4, 8);
                }

                int global_size = input.Length / (16 * 32);
                int local_size = (int)queue.Device.LocalMemSize / 512 / 2;
                while (local_size > global_size)
                    local_size >>= 1;

                total += Execute ("write", memTests, input.Length, delegate () {
                    queue.WriteBuffer (mem, 0, input, 0, input.Length);
                });
            #if false
                using (Kernel kernel_encrypt = prog.CreateKernel ("encrypt1")) {
                    kernel_encrypt.SetArgument (0, mem);
                    kernel_encrypt.SetArgument (1, keyMem);
                    kernel_encrypt.SetLocalDataShare (2, 512 * local_size);

                    total += Execute ("kernel(encrypt)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_encrypt, 0, global_size, local_size);
                    });
                }
            #else
                using (Kernel kernel_encrypt2 = prog.CreateKernel ("encrypt2"))
                using (Kernel kernel_bitslice = prog.CreateKernel ("bitslice_kernel")) {
                    kernel_bitslice.SetArgument (0, mem);
                    kernel_encrypt2.SetArgument (0, mem);
                    kernel_encrypt2.SetArgument (1, keyMem);
                    kernel_encrypt2.SetLocalDataShare (2, 512 * local_size);

            #if true
                    total += Execute ("kernel(bitslice)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_bitslice, 0, global_size, local_size);
                    });
                    total += Execute ("kernel(encrypt2)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_encrypt2, 0, global_size, local_size);
                    });
                    total += Execute ("kernel(unbitslice)", encryptTests, input.Length, delegate () {
                        queue.Execute (kernel_bitslice, 0, global_size, local_size);
                    });
            #else
                    total += Execute ("kernel", encryptTests, input.Length, delegate () {
                        EventHandle bitslice_wait, encrypt_wait;
                        queue.ExecuteAsync (kernel_bitslice, 0, global_size, local_size, out bitslice_wait);
                        queue.ExecuteAsync (kernel_encrypt2, 0, global_size, local_size, new EventHandle[] { bitslice_wait }, out encrypt_wait);
                        queue.Execute (kernel_bitslice, 0, global_size, local_size, new EventHandle[] { encrypt_wait });
                    });
            #endif
                }
            #endif
                total += Execute ("read", memTests, input.Length, delegate () {
                    queue.ReadBuffer (mem, 0, output, 0, output.Length);
                });
            }
            #elif false
            using (CLProgram prog = context.CreateProgram (OclCodeStore.GetOclCode ("bitslice_aes"), context.Devices, null))
            using (Memory inMem = context.CreateBuffer (MemoryFlags.ReadWrite, input.Length))
            using (Memory keyMem = context.CreateBuffer (MemoryFlags.ReadWrite, expandedKey.Length * 32)) {

                using (Memory nonsliceKeyMem = context.CreateBuffer (MemoryFlags.ReadOnly, expandedKey.Length))
                using (Kernel kernel = prog.CreateKernel ("bitslice_key")) {
                    kernel.SetArgument (0, nonsliceKeyMem);
                    kernel.SetArgument (1, keyMem);
                    queue.WriteBuffer (nonsliceKeyMem, 0, expandedKey, 0, expandedKey.Length);
                    queue.Execute (kernel, 0, expandedKey.Length * 8, 128);
                }

                int global_size = input.Length / 16;
                int local_size = 128;

                total += Execute ("write", memTests, input.Length, delegate () {
                    queue.WriteBuffer (inMem, 0, input, 0, input.Length);
                });

                using (Kernel kernel = prog.CreateKernel ("encrypt")) {
//.........这里部分代码省略.........
开发者ID:kazuki,项目名称:oclcrypto,代码行数:101,代码来源:Program.cs

示例10: ExecuteKernel

        private unsafe void ExecuteKernel(
            Context context,
            Device device,
            CommandQueue commandQueue,
            Kernel kernel,
            float[] input,
            float[] output,
            int globalWorkSize,
            int localWorkSize,
            bool warming,
            bool useHostPointer,
            bool autoGroupSize,
            bool enableProfiling,
            out TimeSpan stopwatchTime,
            out TimeSpan profiledTime,
            out TimeSpan readTime)
        {
            MemoryFlags inFlags = (useHostPointer ? MemoryFlags.UseHostPointer : MemoryFlags.CopyHostPointer) | MemoryFlags.ReadOnly;
            MemoryFlags outFlags = (useHostPointer ? MemoryFlags.UseHostPointer : MemoryFlags.CopyHostPointer) | MemoryFlags.ReadWrite;

            int taskSize = input.Length;

            // allocate buffers
            fixed (float* pinput = input, poutput = output)
            {
                using (Buffer inputBuffer = context.CreateBuffer(inFlags, sizeof(float) * taskSize, (IntPtr)pinput),
                    outputBuffer = context.CreateBuffer(outFlags, sizeof(float) * taskSize, (IntPtr)poutput))
                {
                    kernel.Arguments[0].SetValue(inputBuffer);
                    kernel.Arguments[1].SetValue(outputBuffer);

                    Console.WriteLine("Original global work size {0}", globalWorkSize);
                    Console.WriteLine("Original local work size {0}", localWorkSize);
                    if (autoGroupSize)
                    {
                        Console.WriteLine("Run-time determines optimal workgroup size");
                    }

                    IntPtr workGroupSizeMaximum = kernel.GetWorkGroupSize(device);
                    Console.WriteLine("Maximum workgroup size for this kernel  {0}", workGroupSizeMaximum.ToInt64());

                    if (warming)
                    {
                        Console.Write("Warming up OpenCL execution...");
                        using (commandQueue.EnqueueNDRangeKernel(kernel, new[] { (IntPtr)globalWorkSize }, autoGroupSize ? null : new[] { (IntPtr)localWorkSize }))
                        {
                        }

                        commandQueue.Finish();
                        Console.WriteLine("Done");
                    }

                    Console.Write("Executing OpenCL kernel...");
                    Stopwatch timer = Stopwatch.StartNew();

                    // execute kernel, pls notice autoGroupSize
                    using (Event perfEvent = commandQueue.EnqueueNDRangeKernel(kernel, new[] { (IntPtr)globalWorkSize }, autoGroupSize ? null : new[] { (IntPtr)localWorkSize }))
                    {
                        Event.WaitAll(perfEvent);
                        stopwatchTime = timer.Elapsed;

                        Console.WriteLine("Done");

                        if (enableProfiling)
                        {
                            ulong start = perfEvent.CommandStartTime;
                            ulong end = perfEvent.CommandEndTime;

                            // a tick is 100ns
                            profiledTime = TimeSpan.FromTicks((long)(end - start) / 100);
                        }
                        else
                        {
                            profiledTime = TimeSpan.Zero;
                        }
                    }

                    timer.Restart();
                    if (useHostPointer)
                    {
                        IntPtr tmpPtr;
                        using (commandQueue.EnqueueMapBuffer(outputBuffer, true, MapFlags.Read, 0, sizeof(float) * taskSize, out tmpPtr))
                        {
                        }

                        Assert.AreEqual((IntPtr)poutput, tmpPtr, "EnqueueMapBuffer failed to return original pointer");
                        using (commandQueue.EnqueueUnmapMemObject(outputBuffer, tmpPtr))
                        {
                        }
                    }
                    else
                    {
                        using (commandQueue.EnqueueReadBuffer(outputBuffer, true, 0, sizeof(float) * taskSize, (IntPtr)poutput))
                        {
                        }
                    }

                    commandQueue.Finish();
                    readTime = timer.Elapsed;
                }
//.........这里部分代码省略.........
开发者ID:sharwell,项目名称:NOpenCL,代码行数:101,代码来源:Optimization.cs


注:本文中的Context.CreateBuffer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。