本文整理汇总了C#中GThread.AllocateShared方法的典型用法代码示例。如果您正苦于以下问题:C# GThread.AllocateShared方法的具体用法?C# GThread.AllocateShared怎么用?C# GThread.AllocateShared使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类GThread
的用法示例。
在下文中一共展示了GThread.AllocateShared方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C#代码示例。
示例1: histo_kernel
public static void histo_kernel(GThread thread, byte[] buffer, int size, uint[] histo)
{
// clear out the accumulation buffer called temp
// since we are launched with 256 threads, it is easy
// to clear that memory with one write per thread
uint[] temp = thread.AllocateShared<uint>("temp", 256);
temp[thread.threadIdx.x] = 0;
thread.SyncThreads();
// calculate the starting index and the offset to the next
// block that each thread will be processing
int i = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
int stride = thread.blockDim.x * thread.gridDim.x;
while (i < size)
{
thread.atomicAdd(ref temp[buffer[i]], 1 );
i += stride;
}
// sync the data from the above writes to shared memory
// then add the shared memory values to the values from
// the other thread blocks using global memory
// atomic adds
// same as before, since we have 256 threads, updating the
// global histogram is just one write per thread!
thread.SyncThreads();
thread.atomicAdd(ref (histo[thread.threadIdx.x]), temp[thread.threadIdx.x]);
}
示例2: Product
public static void Product(GThread thread, int[] a, int[] b, int[] c)
{
int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
int[] cache = thread.AllocateShared<int>("cache", 4);
int temp = 0;
int cacheIndex=thread.threadIdx.x;
while (tid < N)
{
temp = temp + a[tid] * b[tid];
tid += thread.blockDim.x * thread.gridDim.x;
}
cache[thread.threadIdx.x] = temp;
thread.SyncThreads();
int i = thread.blockDim.x / 2;
while (i != 0)
{
if (cacheIndex < i)
{
cache[cacheIndex] += cache[cacheIndex + i];
}
thread.SyncThreads();
i /= 2;
}
if (cacheIndex == 0)
{
c[thread.blockIdx.x] = cache[0];
}
}
示例3: Dot
public static void Dot(GThread thread, float[] a, float[] b, float[] c)
{
float[] cache = thread.AllocateShared<float>("cache", threadsPerBlock);
int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
int cacheIndex = thread.threadIdx.x;
float temp = 0;
while (tid < N)
{
temp += a[tid] * b[tid];
tid += thread.blockDim.x * thread.gridDim.x;
}
// set the cache values
cache[cacheIndex] = temp;
// synchronize threads in this block
thread.SyncThreads();
// for reductions, threadsPerBlock must be a power of 2
// because of the following code
int i = thread.blockDim.x / 2;
while (i != 0)
{
if (cacheIndex < i)
cache[cacheIndex] += cache[cacheIndex + i];
thread.SyncThreads();
i /= 2;
}
if (cacheIndex == 0)
c[thread.blockIdx.x] = cache[0];
}
示例4: ExplorePermutationsKernel
public static void ExplorePermutationsKernel(GThread gThread, Evaluation[] evaluations)
{
var blockEvaluations = gThread.AllocateShared<Evaluation>("be", 256);
var v = gThread.AllocateShared<byte>("v", 256, 9);
var t = gThread.threadIdx.x;
var permutation = gThread.blockIdx.x * gThread.blockDim.x + gThread.threadIdx.x;
// 0 1 2
// 3 4 5
// 6 7 8
TileOrderFromPermutation(Permutations, permutation, 9, v, t);
var metric = 0f;
metric += LeftRightFit[v[t, 0], v[t, 1]] + LeftRightFit[v[t, 1], v[t, 2]];
metric += LeftRightFit[v[t, 3], v[t, 4]] + LeftRightFit[v[t, 4], v[t, 5]];
metric += LeftRightFit[v[t, 6], v[t, 7]] + LeftRightFit[v[t, 7], v[t, 8]];
metric += TopBottomFit[v[t, 0], v[t, 3]] + TopBottomFit[v[t, 3], v[t, 6]];
metric += TopBottomFit[v[t, 1], v[t, 4]] + TopBottomFit[v[t, 4], v[t, 7]];
metric += TopBottomFit[v[t, 2], v[t, 5]] + TopBottomFit[v[t, 5], v[t, 8]];
blockEvaluations[t].Permutation = permutation;
blockEvaluations[t].Metric = metric;
gThread.SyncThreads();
for (var i = 256 / 2; i > 0; i /= 2)
{
if (t < i)
{
if (blockEvaluations[t].Metric > blockEvaluations[t + i].Metric)
{
blockEvaluations[t] = blockEvaluations[t + i];
}
}
gThread.SyncThreads();
}
if (gThread.threadIdx.x == 0)
{
evaluations[gThread.blockIdx.x] = blockEvaluations[0];
}
}
示例5: ComputeFitsKernel
public static void ComputeFitsKernel(GThread gThread, int edgeIndexA, int edgeIndexB, float[,] fit)
{
var sum = gThread.AllocateShared<float>("sum", 64);
var tileIndexA = gThread.blockIdx.x;
var tileIndexB = gThread.blockIdx.y;
var pixelIndex = gThread.threadIdx.x;
var diff = Edges[tileIndexA, edgeIndexA, pixelIndex] - Edges[tileIndexB, edgeIndexB, pixelIndex];
sum[pixelIndex] = diff * diff;
gThread.SyncThreads();
for (var i = 64 / 2; i > 0; i /= 2)
{
if (pixelIndex < i)
{
sum[pixelIndex] += sum[pixelIndex + i];
}
gThread.SyncThreads();
}
if (pixelIndex == 0)
{
fit[tileIndexA, tileIndexB] = sum[0];
}
}
示例6: GpuFindPathDistance
public static void GpuFindPathDistance(GThread thread, AnswerStruct[] answer)
{
var answerLocal = thread.AllocateShared<AnswerStruct>("ansL", ThreadsPerBlock);
var bestDistance = thread.gridDim.x;
var bestPermutation = thread.blockDim.x;
var sum = 0;
for (int i = 0; i < thread.blockDim.x; i++) sum += i * thread.threadIdx.x;
answerLocal[thread.threadIdx.x].distance = bestDistance;
answerLocal[thread.threadIdx.x].pathNo = bestPermutation;
thread.SyncThreads();
if (thread.threadIdx.x == 0)
{
answer[thread.blockIdx.x] = answerLocal[0];
}
}
示例7: copy2D
public static void copy2D(GThread thread, int[] result)
{
int[,] cache = thread.AllocateShared<int>("cache", XSIZE, YSIZE);
int x = thread.blockIdx.x;
int y = 0;
while (y < YSIZE)
{
cache[x, y] = Constant2D[x, y] * Constant2D.Rank;
result[x * YSIZE + y] = cache[x, y];
y++;
}
}
示例8: VectorAdd
public static void VectorAdd(GThread thread,
[CudafyAddressSpace(eCudafyAddressSpace.Global)] int[] a,
int[] b,
int[] c )
{
int[] shared = thread.AllocateShared<int>("shared", Program.N);
int index = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
//int index = thread.get_local_id(0);
c[index] = (a[index] + b[index]) * ConstantMemory[index];
thread.SyncThreads();
}