本文整理汇总了C#中ComputeCommandQueue.Finish方法的典型用法代码示例。如果您正苦于以下问题:C# ComputeCommandQueue.Finish方法的具体用法?C# ComputeCommandQueue.Finish怎么用?C# ComputeCommandQueue.Finish使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ComputeCommandQueue
的用法示例。
在下文中一共展示了ComputeCommandQueue.Finish方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: RunInternal
protected override void RunInternal()
{
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.Profiling);
Console.WriteLine("Original content:");
Random rand = new Random();
int count = 6;
long[] bufferContent = new long[count];
for (int i = 0; i < count; i++)
{
bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue);
Console.WriteLine("\t" + bufferContent[i]);
}
ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent);
IntPtr mappedPtr = commands.Map(buffer, false, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null);
commands.Finish();
Console.WriteLine("Mapped content:");
for (int i = 0; i < bufferContent.Length; i++)
{
IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long));
Console.WriteLine("\t" + Marshal.ReadInt64(ptr));
}
commands.Unmap(buffer, ref mappedPtr, null);
}
示例2: ConductSearch
private static void ConductSearch(ComputeContext context, ComputeKernel kernel)
{
var todos = GetQueenTaskPartition(NumQueens, 4);
var done = new List<QueenTask>();
ComputeEventList eventList = new ComputeEventList();
var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None);
Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread);
QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done);
var sw = new Stopwatch();
sw.Start();
while (inProgress.Any())
{
var taskBuffer =
new ComputeBuffer<QueenTask>(context,
ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer,
inProgress);
kernel.SetMemoryArgument(0, taskBuffer);
commands.WriteToBuffer(inProgress, taskBuffer, false, null);
for (int i = 0; i < 12; i++)
commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList);
commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList);
commands.Finish();
inProgress = GetNextAssignment(inProgress, todos, done);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds / 1000.0);
ulong sum = done.Select(state => state.solutions)
.Aggregate((total, next) => total + next);
Console.WriteLine("Q({0})={1}", NumQueens, sum);
}
示例3: Run
public void Run(ComputeContext context, TextWriter log)
{
try
{
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
log.WriteLine("Original content:");
Random rand = new Random();
int count = 6;
long[] bufferContent = new long[count];
for (int i = 0; i < count; i++)
{
bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue);
log.WriteLine("\t" + bufferContent[i]);
}
ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent);
IntPtr mappedPtr = commands.Map(buffer, true, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null);
log.WriteLine("Mapped content:");
for (int i = 0; i < bufferContent.Length; i++)
{
IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long));
log.WriteLine("\t" + Marshal.ReadInt64(ptr));
}
commands.Unmap(buffer, ref mappedPtr, null);
// wait for the unmap to happen
commands.Finish();
// cleanup buffer
buffer.Dispose();
// cleanup commands
commands.Dispose();
}
catch (Exception e)
{
log.WriteLine(e.ToString());
}
}
示例4: Run
public static void Run(TextWriter log, ComputeContext context)
{
StartTest(log, "Dummy test");
try
{
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
log.WriteLine("Original content:");
Random rand = new Random();
int count = 6;
long[] bufferContent = new long[count];
for (int i = 0; i < count; i++)
{
bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue);
log.WriteLine("\t" + bufferContent[i]);
}
ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent);
IntPtr mappedPtr = commands.Map(buffer, false, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null);
commands.Finish();
log.WriteLine("Mapped content:");
for (int i = 0; i < bufferContent.Length; i++)
{
IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long));
log.WriteLine("\t" + Marshal.ReadInt64(ptr));
}
commands.Unmap(buffer, ref mappedPtr, null);
}
catch (Exception e)
{
log.WriteLine(e.ToString());
}
EndTest(log, "Dummy test");
}
示例5: notify
private unsafe void notify(CLProgramHandle programHandle, IntPtr userDataPtr)
{
uint[] dst = new uint[16];
fixed (uint* dstPtr = dst)
{
using (var queue = new ComputeCommandQueue(ccontext, device, ComputeCommandQueueFlags.None))
{
var buf = new ComputeBuffer<uint>(ccontext, ComputeMemoryFlags.WriteOnly, 16);
var kernel = program.CreateKernel("test");
kernel.SetValueArgument(0, 1443351125U);
kernel.SetMemoryArgument(1, buf);
var eventList = new ComputeEventList();
queue.Execute(kernel, null, new long[] { 16L, 256L, 1048576L }, null, null);
queue.Finish();
queue.Read<uint>(buf, true, 0, 16, (IntPtr)dstPtr, null);
queue.Finish();
queue.Finish();
}
}
}
示例6: RunInternal
protected override void RunInternal()
{
int count = 10;
float[] arrA = new float[count];
float[] arrB = new float[count];
float[] arrC = new float[count];
Random rand = new Random();
for (int i = 0; i < count; i++)
{
arrA[i] = (float)(rand.NextDouble() * 100);
arrB[i] = (float)(rand.NextDouble() * 100);
}
ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);
ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource });
program.Build(null, null, null, IntPtr.Zero);
ComputeKernel kernel = program.CreateKernel("VectorAdd");
kernel.SetMemoryArgument(0, a);
kernel.SetMemoryArgument(1, b);
kernel.SetMemoryArgument(2, c);
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
ComputeEventList events = new ComputeEventList();
commands.Execute(kernel, null, new long[] { count }, null, events);
arrC = new float[count];
GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned);
commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events);
commands.Finish();
arrCHandle.Free();
for (int i = 0; i < count; i++)
Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
}
示例7: CalculateConvolution
private void CalculateConvolution(ComputeContext computeContext)
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
float dx;
bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx);
if (!shiftXParse)
throw new SyntaxErrorException(", needs to be .");
float dy;
bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy);
if (!shiftYParse)
throw new SyntaxErrorException(", needs to be .");
float dz;
bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz);
if (!shiftZParse)
throw new SyntaxErrorException(", needs to be .");
int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ;
Console.WriteLine("Computing...");
Console.WriteLine("Reading kernel...");
String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName;
String kernelString;
using (var sr = new StreamReader(kernelPath + "\\convolution.cl"))
kernelString = sr.ReadToEnd();
Console.WriteLine("Reading kernel... done");
float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz);
//create openCL program
ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString);
computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero);
ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice);
Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus);
String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice);
Console.WriteLine("buildLog");
if (buildLog.Equals("\n"))
Console.WriteLine("\tbuildLog is empty...");
else
Console.WriteLine("\t" + buildLog);
float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename);
/////////////////////////////////////////////
// Create a Command Queue & Event List
/////////////////////////////////////////////
ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None);
////////////////////////////////////////////////////////////////
// Create Buffers Transform
////////////////////////////////////////////////////////////////
ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength);
ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength);
/////////////////////////////////////////////
// Create the transformFluorophoresKernel
///////////////////////////////////////////////////////////
ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores");
/////////////////////////////////////////////
// Set the transformFluorophoresKernel arguments
/////////////////////////////////////////////
transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords);
transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix);
/////////////////////////////////////////////
// Configure the work-item structure
/////////////////////////////////////////////
long[] globalWorkOffsetTransformFluorophoresKernel = null;
long[] globalWorkSizeTransformFluorophoresKernel = new long[] { fluorophores.Length / 4 };
long[] localWorkSizeTransformFluorophoresKernel = null;
////////////////////////////////////////////////////////
// Enqueue the transformFluorophoresKernel for execution
////////////////////////////////////////////////////////
computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null);
computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null);
computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null);
// computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents);
float[] transformedFluorophores = new float[fluorophores.Length];
computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null);
computeCommandQueue.Finish();
//TODO remove, only for testing
// for (int i = 0; i < transformedFluorophores.Length; i++)
//.........这里部分代码省略.........
示例8: Test
//.........这里部分代码省略.........
}
ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context,
ComputeMemoryFlags.UseHostPointer, inMatrixA);
ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context,
ComputeMemoryFlags.UseHostPointer, inMatrixB);
ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context,
ComputeMemoryFlags.UseHostPointer, outMatrixC);
long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize);
//Sets arguments
kernel.SetMemoryArgument(0, bufferMatrixA);
kernel.SetMemoryArgument(1, bufferMatrixB);
kernel.SetMemoryArgument(2, bufferMatrixC);
kernel.SetLocalArgument(3, sideSize * 2);
kernel.SetValueArgument<int>(4, sideSize);
//kernel.SetLocalArgument(1, localWorkSize);
string offset = " ";
for (int x = 0; x < sideSize; x++)
offset += " ";
if (sideSize <= 32)
for (int y = 0; y < sideSize; y++)
{
Console.Write(offset);
for (int x = 0; x < sideSize; x++)
Console.Write(inMatrixA[y * sideSize + x] + " ");
Console.WriteLine();
}
//Runs commands
ComputeCommandQueue commands = new ComputeCommandQueue(context,
context.Devices[0], ComputeCommandQueueFlags.None);
long executionTime = DateTime.Now.Ticks;
//Execute kernel
//globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize())
commands.Execute(kernel, null,
new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) },
new long[] { localWorkSize, 1 }, null);
//globalWorkSize can be any size
//localWorkSize product much not be greater than device.MaxComputeUnits
//and it must not be greater than kernel.GetWorkGroupSize()
//ESSENTIALLY, the program iterates through globalWorkSize
//in increments of localWorkSize. Both are multidimensional,
//but this just saves us the time of doing that
//(1 dimension can be put to multiple if the max dimension lengths
//are known very easily with remainder).
//Also, you should probably use this
//kernel.GetPreferredWorkGroupSizeMultiple(device);
commands.Finish();
commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null);
commands.Finish();
executionTime = DateTime.Now.Ticks - executionTime;
GC.Collect();
program.Dispose();
Console.WriteLine();
if (sideSize <= 32)
for (int y = 0; y < sideSize; y++)
{
for (int x = 0; x < sideSize; x++)
Console.Write(inMatrixB[y * sideSize + x] + " ");
Console.Write(" ");
for (int x = 0; x < sideSize; x++)
Console.Write(outMatrixC[y * sideSize + x] + " ");
Console.WriteLine();
}
int testY = random.Next(sideSize);
int testX = random.Next(sideSize);
int sum = 0;
for (int q = 0; q < sideSize; q++)
sum += inMatrixA[q * sideSize + testX] *
inMatrixB[testY * sideSize + q];
Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]);
Console.WriteLine(executionTime / 10000.0);
}
示例9: ComputerCL
/// <summary>
/// OpenCLでの計算プログラムを作成する
/// </summary>
/// <param name="maxDt">初期時間刻み</param>
/// <param name="a">振幅</param>
/// <param name="omega">角速度</param>
public ComputerCL(double maxDt, double a, double omega)
: base(maxDt, a, omega)
{
// プラットフォームとデバイス群を取得
this.Platform = ComputePlatform.Platforms[0];
this.Devices = this.Platform.Devices;
// コンテキストを作成
var context = new ComputeContext(this.Devices, new ComputeContextPropertyList(this.Platform), null, IntPtr.Zero);
// キューを作成
this.queue = new ComputeCommandQueue(context, this.Devices[0], ComputeCommandQueueFlags.None);
// プログラムを作成
var program = new ComputeProgram(context, Properties.Resources.SinAcceleration);
// ビルドしてみて
try
{
program.Build(this.Devices, null, null, IntPtr.Zero);
}
// 失敗したら
catch(BuildProgramFailureComputeException ex)
{
// 例外を投げる
throw new BuildCLException(program.Source[0], program.GetBuildLog(this.Devices[0]));
}
// カーネルを作成
this.sinAccelerationKernel = program.CreateKernel("SinAcceleration");
// 準備処理は何もしない
this.prepare = () => { };
// 粒子が追加された時に
base.ParticleAdded += (sender, e) =>
{
// 準備処理の時の処理を実装
this.prepare = () =>
{
// 粒子数を設定
this.particleCount = this.inputParticles.Count;
// バッファーを作成
this.bufferX = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
this.bufferU = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
this.bufferA = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount);
this.bufferD = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly, this.particleCount);
// 入力データを確保
var particlesX = new Vector4[this.particleCount];
var particlesU = new Vector4[this.particleCount];
var particlesA = new Vector4[this.particleCount];
this.particlesD = new float[this.particleCount];
this.particlesMaterial = new Material[this.particleCount];
this.particlesType = new ParticleType[this.particleCount];
// 全粒子について
int i = 0;
foreach(var particle in this.inputParticles)
{
// データをコピー
particlesX[i] = new Vector4((Vector3)particle.X, 0);
particlesU[i] = new Vector4((Vector3)particle.U, 0);
particlesA[i] = new Vector4((Vector3)particle.A, 0);
this.particlesD[i] = (float)particle.D;
this.particlesMaterial[i] = particle.Material;
this.particlesType[i] = particle.Type;
i++;
}
// バッファーへ転送
this.queue.WriteToBuffer(particlesX, this.bufferX, false, null);
this.queue.WriteToBuffer(particlesU, this.bufferU, false, null);
this.queue.WriteToBuffer(particlesA, this.bufferA, false, null);
this.queue.WriteToBuffer(this.particlesD, this.bufferD, false, null);
// 入力粒子群を空にする
this.inputParticles.Clear();
// 準備処理は空
this.prepare = () => { };
// ここまで完了を待機
queue.Finish();
};
};
}
示例10: Screenshot
public Bitmap Screenshot(int screenshotHeight, int slowRenderPower, Action<string> displayInformation)
{
displayInformation("Rendering screenshot");
var ccontext = _kernel.ComputeContext;
_kernelInUse++;
var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
Bitmap bmp;
try
{
bmp = new Bitmap(screenshotWidth, screenshotHeight, PixelFormat.Format24bppRgb);
}
catch (ArgumentException)
{
MessageBox.Show("Image size too big", "Error");
return null;
}
var nancount = 0;
var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb);
var scan0 = bmpData.Scan0.ToInt64();
var queue = new ComputeCommandQueue(ccontext, ccontext.Devices[0], ComputeCommandQueueFlags.None);
var localSize = _kernel.Threadsize(queue);
for (var i = 0; i < localSize.Length; i++)
localSize[i] *= slowRenderPower;
var computeBuffer = new ComputeBuffer<Vector4>(ccontext, ComputeMemoryFlags.ReadWrite, localSize[0] * localSize[1]);
const int numFrames = 200;
var frameDependantControls = _parameters as IFrameDependantControl;
var framesToRender = frameDependantControls == null ? 1 : numFrames;
var totalYs = (screenshotHeight + localSize[1] - 1) / localSize[1];
var totalXs = (screenshotWidth + localSize[0] - 1) / localSize[0];
var stopwatch = new Stopwatch();
for (var y = 0; y < totalYs; y++)
{
for (var x = 0; x < totalXs; x++)
{
stopwatch.Restart();
for (var frame = 0; frame < framesToRender; frame++)
{
if (frameDependantControls != null)
frameDependantControls.Frame = frame;
displayInformation(string.Format("Screenshot {0}% done", 100 * (y * totalXs * framesToRender + x * framesToRender + frame) / (totalXs * totalYs * framesToRender)));
_kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight), slowRenderPower, new Size(x, y), (int)localSize[0]);
}
var pixels = new Vector4[localSize[0] * localSize[1]];
queue.ReadFromBuffer(computeBuffer, ref pixels, true, 0, 0, localSize[0] * localSize[1], null);
queue.Finish();
stopwatch.Stop();
var elapsed = stopwatch.Elapsed.TotalMilliseconds / framesToRender;
_kernel.AverageKernelTime = (elapsed + _kernel.AverageKernelTime * 4) / 5;
var blockWidth = Math.Min(localSize[0], screenshotWidth - x * localSize[0]);
var blockHeight = Math.Min(localSize[1], screenshotHeight - y * localSize[1]);
var intPixels = new byte[blockWidth * blockHeight * 3];
for (var py = 0; py < blockHeight; py++)
{
for (var px = 0; px < blockWidth; px++)
{
var pixel = pixels[py * localSize[1] + px];
if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
nancount++;
// BGR
if (float.IsNaN(pixel.Z) == false)
intPixels[(py * blockWidth + px) * 3 + 0] = (byte)(pixel.Z * 255);
if (float.IsNaN(pixel.Y) == false)
intPixels[(py * blockWidth + px) * 3 + 1] = (byte)(pixel.Y * 255);
if (float.IsNaN(pixel.X) == false)
intPixels[(py * blockWidth + px) * 3 + 2] = (byte)(pixel.X * 255);
}
}
for (var line = 0; line < blockHeight; line++)
Marshal.Copy(intPixels, line * (int)blockWidth * 3, new IntPtr(scan0 + ((y * localSize[1] + line) * bmpData.Stride) + x * localSize[0] * 3), (int)blockWidth * 3);
}
}
bmp.UnlockBits(bmpData);
if (nancount != 0)
MessageBox.Show(string.Format("Caught {0} NAN pixels while taking screenshot", nancount), "Warning");
_kernelInUse--;
return bmp;
}
示例11: Calculate
public static void Calculate(List<Calculation> calculations)
{
Stopwatch s = new Stopwatch();
s.Start();
int count = calculations.Count;
IntVec2[] p_p = new IntVec2[count];
IntVec2[] p_a = new IntVec2[count];
IntVec2[] p_b = new IntVec2[count];
IntVec2[] p_c = new IntVec2[count];
FloatVec3[] c = new FloatVec3[count];
int[] c_valid = new int[count];
Parallel.For(0, count, i =>
{
var calc = calculations[i];
p_p[i] = new IntVec2(calc.P);
p_a[i] = new IntVec2(calc.A);
p_b[i] = new IntVec2(calc.B);
p_c[i] = new IntVec2(calc.C);
});
mark(s, "memory init");
ComputeBuffer<IntVec2> _p_p = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_p);
ComputeBuffer<IntVec2> _p_a = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_a);
ComputeBuffer<IntVec2> _p_b = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_b);
ComputeBuffer<IntVec2> _p_c = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_c);
ComputeBuffer<FloatVec3> _c = new ComputeBuffer<FloatVec3>(context, ComputeMemoryFlags.WriteOnly, c.Length);
ComputeBuffer<int> _c_valid = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, c_valid.Length);
mark(s, "memory buffer init");
ComputeKernel kernel = program.CreateKernel("Barycentric");
kernel.SetMemoryArgument(0, _p_p);
kernel.SetMemoryArgument(1, _p_a);
kernel.SetMemoryArgument(2, _p_b);
kernel.SetMemoryArgument(3, _p_c);
kernel.SetMemoryArgument(4, _c);
kernel.SetMemoryArgument(5, _c_valid);
mark(s, "memory init 2");
ComputeEventList eventList = new ComputeEventList();
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
commands.Execute(kernel, null, new long[] { count }, null, eventList);
mark(s, "execute");
commands.ReadFromBuffer(_c, ref c, false, eventList);
commands.ReadFromBuffer(_c_valid, ref c_valid, false, eventList);
commands.Finish();
mark(s, "read 1");
Parallel.For(0, count, i =>
{
var calc = calculations[i];
calc.Coords = new BarycentricCoordinates(c[i].U,c[i].V,c[i].W);
if (c_valid[i] == 1)
{
lock (calc.Tri)
calc.Tri.Points.Add(new DrawPoint(calc.Coords, calc.P));
}
});
mark(s, "read 2");
// cleanup commands
commands.Dispose();
// cleanup events
foreach (ComputeEventBase eventBase in eventList)
{
eventBase.Dispose();
}
eventList.Clear();
// cleanup kernel
kernel.Dispose();
_p_p.Dispose();
_p_a.Dispose();
//.........这里部分代码省略.........
示例12: Main
static void Main(string[] args)
{
#region
const string programName = "Prime Number";
Stopwatch stopWatch = new Stopwatch();
string clProgramSource = KernelProgram();
Console.WriteLine("Environment OS:");
Console.WriteLine("-----------------------------------------");
Console.WriteLine(Environment.OSVersion);
#endregion
if (ComputePlatform.Platforms.Count == 0)
{
Console.WriteLine("No OpenCL Platforms are availble!");
}
else
{
#region 1
// step 1 choose the first available platform
ComputePlatform platform = ComputePlatform.Platforms[0];
// output the basic info
BasicInfo(platform);
Console.WriteLine("Program: " + programName);
Console.WriteLine("-----------------------------------------");
#endregion
//Cpu 10 seconds Gpu 28 seconds
int count = 64;
int[] output_Z = new int[count * count * count];
int[] input_X = new int[count * count * count];
for (int x = 0; x < count * count * count; x++)
{
input_X[x] = x;
}
#region 2
// step 2 create context for that platform and all devices
ComputeContextPropertyList properties = new ComputeContextPropertyList(platform);
ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero);
// step 3 create and build program
ComputeProgram program = new ComputeProgram(context, clProgramSource);
program.Build(platform.Devices, null, null, IntPtr.Zero);
#endregion
// step 4 create memory objects
ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X);
ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length);
// step 5 create kernel object with same kernel programe name VectorAdd
ComputeKernel kernel = program.CreateKernel("PrimeNumber");
// step 6 set kernel arguments
//kernel.SetMemoryArgument(0, a);
kernel.SetMemoryArgument(0, a);
kernel.SetMemoryArgument(1, z);
ComputeEventList eventList = new ComputeEventList();
//for (int j = 0; j < context.Devices.Count; j++)
// query available devices n,...,1,0. cpu first then gpu
for (int j = context.Devices.Count-1; j > -1; j--)
{
#region 3
stopWatch.Start();
// step 7 create command queue on that context on that device
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None);
// step 8 run the kernel program
commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList);
//Application.DoEvents();
#endregion
// step 9 read results
commands.ReadFromBuffer(z, ref output_Z, false, eventList);
#region 4
commands.Finish();
string fileName = "C:\\primenumber\\PrimeNumberGPU.txt";
StreamWriter file = new StreamWriter(fileName, true);
FileInfo info = new FileInfo(fileName);
long fs = info.Length;
// 1 MegaByte = 1.049e+6 Byte
int index = 1;
if (fs == 1.049e+6)
{
fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt";
file = new System.IO.StreamWriter(fileName, true);
index++;
}
#endregion
for (uint xx = 0; xx < count * count * count; xx++)
//.........这里部分代码省略.........
示例13: Run
public void Run(ComputeContext context, TextWriter log)
{
try
{
// Create the arrays and fill them with random data.
int count = 10;
float[] arrA = new float[count];
float[] arrB = new float[count];
float[] arrC = new float[count];
Random rand = new Random();
for (int i = 0; i < count; i++)
{
arrA[i] = (float)(rand.NextDouble() * 100);
arrB[i] = (float)(rand.NextDouble() * 100);
}
// Create the input buffers and fill them with data from the arrays.
// Access modifiers should match those in a kernel.
// CopyHostPointer means the buffer should be filled with the data provided in the last argument.
ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
// The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);
// Create and build the opencl program.
program = new ComputeProgram(context, clProgramSource);
program.Build(null, null, null, IntPtr.Zero);
// Create the kernel function and set its arguments.
ComputeKernel kernel = program.CreateKernel("VectorAdd");
kernel.SetMemoryArgument(0, a);
kernel.SetMemoryArgument(1, b);
kernel.SetMemoryArgument(2, c);
// Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
// Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
// For this reason their use should be avoided if possible.
ComputeEventList eventList = new ComputeEventList();
// Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
// Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
// If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
commands.Execute(kernel, null, new long[] { count }, null, eventList);
// Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer
// will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete
// by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
// eventList will contain two events after this method returns.
commands.ReadFromBuffer(c, ref arrC, false, eventList);
// A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
// in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands
// to finish has to be issued before "arrC" can be used.
// This explicit synchronization can be achieved in two ways:
// 1) Wait for the events in the list to finish,
//eventList.Wait();
// 2) Or simply use
commands.Finish();
// Print the results to a log/console.
for (int i = 0; i < count; i++)
log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]);
// cleanup commands
commands.Dispose();
// cleanup events
foreach (ComputeEventBase eventBase in eventList)
{
eventBase.Dispose();
}
eventList.Clear();
// cleanup kernel
kernel.Dispose();
// cleanup program
program.Dispose();
// cleanup buffers
a.Dispose();
b.Dispose();
c.Dispose();
}
catch (Exception e)
{
log.WriteLine(e.ToString());
}
}
示例14: GetScreenshot
public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender)
{
var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio);
var computeBuffer = new ComputeBuffer<Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight);
var queue = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None);
var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight);
for (var i = 0; i < slowRender; i++)
CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
for (var i = 0; i < camera.Frame * slowRender; i++)
CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize);
var pixels = new Vector4[screenshotWidth * screenshotHeight];
queue.ReadFromBuffer(computeBuffer, ref pixels, true, null);
queue.Finish();
computeBuffer.Dispose();
queue.Dispose();
var bmp = new Bitmap(screenshotWidth, screenshotHeight);
var destBuffer = new int[screenshotWidth * screenshotHeight];
for (var y = 0; y < screenshotHeight; y++)
{
for (var x = 0; x < screenshotWidth; x++)
{
var pixel = pixels[x + y * screenshotWidth];
if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z))
{
Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!");
continue;
}
destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255);
}
}
var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);
Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length);
bmp.UnlockBits(bmpData);
return bmp;
}
示例15: Run
public void Run(ComputeContext context, TextWriter log)
{
try
{
// Create the arrays and fill them with random data.
int count = 640*480; //
float[] arrA = new float[count];
float[] arrB = new float[count];
float[] arrC = new float[count];
Random rand = new Random();
for (int i = 0; i < count; i++)
{
arrA[i] = (float)(rand.NextDouble() * 100);
arrB[i] = (float)(rand.NextDouble() * 100);
}
// Create the input buffers and fill them with data from the arrays.
// Access modifiers should match those in a kernel.
// CopyHostPointer means the buffer should be filled with the data provided in the last argument.
program = new ComputeProgram(context, clProgramSource);
program.Build(null, null, null, IntPtr.Zero);
ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA);
//ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB);
// The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length).
ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length);
// Create and build the opencl program.
// Create the kernel function and set its arguments.
ComputeKernel kernel = program.CreateKernel("CompareGPUCPU");
DateTime ExecutionStartTime; //Var will hold Execution Starting Time
DateTime ExecutionStopTime;//Var will hold Execution Stopped Time
TimeSpan ExecutionTime;//Var will count Total Execution Time-Our Main Hero
ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None);
ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
int repeatTimes = 100;
for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
{
kernel.SetMemoryArgument(0, a);
//kernel.SetMemoryArgument(1, b);
//kernel.SetMemoryArgument(2, c);
kernel.SetMemoryArgument(1, c);
// Create the event wait list. An event list is not really needed for this example but it is important to see how it works.
// Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution.
// For this reason their use should be avoided if possible.
//ComputeEventList eventList = new ComputeEventList();
// Create the command queue. This is used to control kernel execution and manage read/write/copy operations.
// Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command.
// If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created.
//commands.Execute(kernel, null, new long[] { count }, null, eventList);
commands.Execute(kernel, null, new long[] { count }, null, null);
// Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer
// will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete
// by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host.
// eventList will contain two events after this method returns.
//commands.ReadFromBuffer(c, ref arrC, false, eventList);
commands.ReadFromBuffer(c, ref arrC, false, null);
// A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands
// in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands
// to finish has to be issued before "arrC" can be used.
// This explicit synchronization can be achieved in two ways:
// 1) Wait for the events in the list to finish,
//eventList.Wait();
// 2) Or simply use
commands.Finish();
}
ExecutionStopTime = DateTime.Now;
ExecutionTime = ExecutionStopTime - ExecutionStartTime;
double perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes;
log.WriteLine("Use {0} ms using GPU", perTaskTime);
// Do that using CPU
/*
ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time
for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++)
{
for (int i = 0; i < count; i++)
{
//arrC[i] = arrA[i] + arrB[i];
int j;
for (j = 0; j < 330 * 10; j++)
arrC[i] = arrA[i] + j;
}
}
ExecutionStopTime = DateTime.Now;
//.........这里部分代码省略.........