C++ SharedMemory::getPointer方法代码示例

本文整理汇总了C++中SharedMemory::getPointer方法的典型用法代码示例。如果您正苦于以下问题：C++ SharedMemory::getPointer方法的具体用法？C++ SharedMemory::getPointer怎么用？C++ SharedMemory::getPointer使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SharedMemory的用法示例。

在下文中一共展示了SharedMemory::getPointer方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: susanKernel

__global__
void susanKernel(T* out, const T* in,
                 const unsigned idim0, const unsigned idim1,
                 const unsigned radius, const float t, const float g,
                 const unsigned edge)
{
    const int rSqrd   = radius*radius;
    const int windLen = 2*radius+1;
    const int shrdLen = BLOCK_X + windLen-1;

    SharedMemory<T> shared;
    T* shrdMem = shared.getPointer();

    const unsigned lx = threadIdx.x;
    const unsigned ly = threadIdx.y;
    const unsigned gx  = blockDim.x * blockIdx.x + lx + edge;
    const unsigned gy  = blockDim.y * blockIdx.y + ly + edge;

    const unsigned nucleusIdx = (ly+radius)*shrdLen + lx+radius;
    shrdMem[nucleusIdx] = gx<idim0 && gy<idim1 ? in[gy*idim0+gx] : 0;
    T m_0 = shrdMem[nucleusIdx];

#pragma unroll
    for (int b=ly, gy2=gy; b<shrdLen; b+=BLOCK_Y, gy2+=BLOCK_Y) {
        int j = gy2-radius;
#pragma unroll
        for (int a=lx, gx2=gx; a<shrdLen; a+=BLOCK_X, gx2+=BLOCK_X) {
            int i = gx2-radius;
            shrdMem[b*shrdLen+a] = (i<idim0 && j<idim1 ? in[j*idim0+i]: m_0);
        }
    }
    __syncthreads();

    if (gx < idim0 - edge && gy < idim1 - edge) {
        unsigned idx = gy*idim0 + gx;
        float nM  = 0.0f;
#pragma unroll
        for (int p=0; p<windLen; ++p) {
#pragma unroll
            for (int q=0; q<windLen; ++q) {
                int i = p - radius;
                int j = q - radius;
                int a = lx + radius + i;
                int b = ly + radius + j;
                if (i*i + j*j < rSqrd) {
                    float c = m_0;
                    float m = shrdMem[b * shrdLen + a];
                    float exp_pow = powf((m - c)/t, 6.0f);
                    float cM = expf(-exp_pow);
                    nM += cM;
                }
            }
        }
        out[idx] = nM < g ? g - nM : T(0);
    }
}

开发者ID:rotorliu，项目名称:arrayfire，代码行数:56，代码来源:susan.hpp

示例2: histogramKernel

static __global__
void histogramKernel(Param<outType> out, CParam<inType> in,
                     int len, int nbins, float minval, float maxval, int nBBS)
{
    SharedMemory<outType> shared;
    outType * shrdMem = shared.getPointer();

    // offset input and output to account for batch ops
    unsigned b2 = blockIdx.x / nBBS;
    const inType *iptr  =  in.ptr + b2 *  in.strides[2] + blockIdx.y *  in.strides[3];
    outType      *optr  = out.ptr + b2 * out.strides[2] + blockIdx.y * out.strides[3];

    int start  = (blockIdx.x-b2*nBBS) * THRD_LOAD * blockDim.x + threadIdx.x;
    int end    = minimum((start + THRD_LOAD * blockDim.x), len);
    float step = (maxval-minval) / (float)nbins;

    // If nbins > max shared memory allocated, then just use atomicAdd on global memory
    bool use_global = nbins > MAX_BINS;

    // Skip initializing shared memory
    if (!use_global) {
        for (int i = threadIdx.x; i < nbins; i += blockDim.x)
            shrdMem[i] = 0;
        __syncthreads();
    }

    for (int row = start; row < end; row += blockDim.x) {
        int idx = isLinear ? row : ((row % in.dims[0]) + (row / in.dims[0])*in.strides[1]);
        int bin = (int)((iptr[idx] - minval) / step);
        bin     = (bin < 0)      ? 0         : bin;
        bin     = (bin >= nbins) ? (nbins-1) : bin;

        if (use_global) {
            atomicAdd((optr + bin), 1);
        } else {
            atomicAdd((shrdMem + bin), 1);
        }
    }

    // No need to write to global if use_global is true
    if (!use_global) {
        __syncthreads();
        for (int i = threadIdx.x; i < nbins; i += blockDim.x) {
            atomicAdd((optr + i), shrdMem[i]);
        }
    }
}

开发者ID:AshwinRajendraprasad，项目名称:arrayfire，代码行数:47，代码来源:histogram.hpp

示例3: morphKernel

static __global__ void morphKernel(Param<T> out, CParam<T> in,
                                   int nBBS0, int nBBS1)
{
    // get shared memory pointer
    SharedMemory<T> shared;
    T * shrdMem = shared.getPointer();

    // calculate necessary offset and window parameters
    const int halo   = windLen/2;
    const int padding= 2*halo;
    const int shrdLen  = blockDim.x + padding + 1;
    const int shrdLen1 = blockDim.y + padding;

    // gfor batch offsets
    unsigned b2 = blockIdx.x / nBBS0;
    unsigned b3 = blockIdx.y / nBBS1;
    const T* iptr    = (const T *) in.ptr + (b2 *  in.strides[2] + b3 *  in.strides[3]);
    T*       optr    = (T *      )out.ptr + (b2 * out.strides[2] + b3 * out.strides[3]);

    const int lx = threadIdx.x;
    const int ly = threadIdx.y;

    // global indices
    const int gx = blockDim.x * (blockIdx.x-b2*nBBS0) + lx;
    const int gy = blockDim.y * (blockIdx.y-b3*nBBS1) + ly;

    // pull image to local memory
    for (int b=ly, gy2=gy; b<shrdLen1; b+=blockDim.y, gy2+=blockDim.y) {
        // move row_set get_local_size(1) along coloumns
        for (int a=lx, gx2=gx; a<shrdLen; a+=blockDim.x, gx2+=blockDim.x) {
            load2ShrdMem<T, isDilation>(shrdMem, iptr, a, b, shrdLen,
                                        in.dims[0], in.dims[1],
                                        gx2-halo, gy2-halo, in.strides[1], in.strides[0]);
        }
    }

    int i = lx + halo;
    int j = ly + halo;

    __syncthreads();

    const T * d_filt = (const T *)cFilter;
    T acc = isDilation ? Binary<T, af_max_t>().init() : Binary<T, af_min_t>().init();
#pragma unroll
    for(int wj=0; wj<windLen; ++wj) {
        int joff   = wj*windLen;
        int w_joff = (j+wj-halo)*shrdLen;
#pragma unroll
        for(int wi=0; wi<windLen; ++wi) {
            if (d_filt[joff+wi] > (T)0) {
                T cur  = shrdMem[w_joff + (i+wi-halo)];
                if (isDilation)
                    acc = max(acc, cur);
                else
                    acc = min(acc, cur);
            }
        }
    }

    if (gx<in.dims[0] && gy<in.dims[1]) {
        int outIdx = lIdx(gx, gy, out.strides[1], out.strides[0]);
        optr[outIdx] = acc;
    }
}

开发者ID:munnybearz，项目名称:arrayfire，代码行数:64，代码来源:morph.hpp

示例4: max

static __global__ void morph3DKernel(Param<T> out, CParam<T> in, int nBBS)
{
    // get shared memory pointer
    SharedMemory<T> shared;
    T * shrdMem = shared.getPointer();

    const int halo      = windLen/2;
    const int padding   = 2*halo;

    const int se_area   = windLen*windLen;
    const int shrdLen   = blockDim.x + padding + 1;
    const int shrdLen1  = blockDim.y + padding;
    const int shrdLen2  = blockDim.z + padding;
    const int shrdArea  = shrdLen * (blockDim.y+padding);

    // gfor batch offsets
    unsigned batchId = blockIdx.x / nBBS;

    const T* iptr    = (const T *) in.ptr + (batchId *  in.strides[3]);
    T*       optr    = (T *      )out.ptr + (batchId * out.strides[3]);

    const int lx = threadIdx.x;
    const int ly = threadIdx.y;
    const int lz = threadIdx.z;

    const int gx = blockDim.x * (blockIdx.x-batchId*nBBS) + lx;
    const int gy = blockDim.y * blockIdx.y + ly;
    const int gz = blockDim.z * blockIdx.z + lz;

    for (int c=lz, gz2=gz; c<shrdLen2; c+=blockDim.z, gz2+=blockDim.z) {
        for (int b=ly, gy2=gy; b<shrdLen1; b+=blockDim.y, gy2+=blockDim.y) {
            for (int a=lx, gx2=gx; a<shrdLen; a+=blockDim.x, gx2+=blockDim.x) {
                load2ShrdVolume<T, isDilation>(shrdMem, iptr, a, b, c, shrdLen, shrdArea,
                                               in.dims[0], in.dims[1], in.dims[2],
                                               gx2-halo, gy2-halo, gz2-halo,
                                               in.strides[2], in.strides[1], in.strides[0]);
            }
        }
    }

    __syncthreads();
    // indices of voxel owned by current thread
    int i  = lx + halo;
    int j  = ly + halo;
    int k  = lz + halo;

    const T * d_filt = (const T *)cFilter;
    T acc = isDilation ? Binary<T, af_max_t>().init() : Binary<T, af_min_t>().init();
#pragma unroll
    for(int wk=0; wk<windLen; ++wk) {
        int koff   = wk*se_area;
        int w_koff = (k+wk-halo)*shrdArea;
#pragma unroll
        for(int wj=0; wj<windLen; ++wj) {
        int joff   = wj*windLen;
        int w_joff = (j+wj-halo)*shrdLen;
#pragma unroll
            for(int wi=0; wi<windLen; ++wi) {
                if (d_filt[koff+joff+wi]) {
                    T cur  = shrdMem[w_koff+w_joff + i+wi-halo];
                    if (isDilation)
                        acc = max(acc, cur);
                    else
                        acc = min(acc, cur);
                }
            }
        }
    }

    if (gx<in.dims[0] && gy<in.dims[1] && gz<in.dims[2]) {
        int outIdx = gz * out.strides[2] +
                          gy * out.strides[1] +
                          gx * out.strides[0];
        optr[outIdx] = acc;
    }
}

开发者ID:munnybearz，项目名称:arrayfire，代码行数:76，代码来源:morph.hpp

示例5: meanshiftKernel

static __global__
void meanshiftKernel(Param<T> out, CParam<T> in,
                     float space_, int radius, float cvar,
                     uint iter, int nBBS0, int nBBS1)
{
    SharedMemory<T> shared;
    T * shrdMem = shared.getPointer();

    // calculate necessary offset and window parameters
    const int padding     = 2*radius + 1;
    const int shrdLen     = blockDim.x + padding;
    const int schStride   = shrdLen*(blockDim.y + padding);
    // the variable ichStride will only effect when we have >1
    // channels. in the other cases, the expression in question
    // will not use the variable
    const int ichStride   = in.strides[2];

    // gfor batch offsets
    unsigned b2 = blockIdx.x / nBBS0;
    unsigned b3 = blockIdx.y / nBBS1;
    const T* iptr = (const T *) in.ptr + (b2 *  in.strides[2] + b3 *  in.strides[3]);
    T*       optr = (T *      )out.ptr + (b2 * out.strides[2] + b3 * out.strides[3]);

    const int lx = threadIdx.x;
    const int ly = threadIdx.y;

    const int gx = blockDim.x * (blockIdx.x-b2*nBBS0) + lx;
    const int gy = blockDim.y * (blockIdx.y-b3*nBBS1) + ly;

    // pull image to local memory
    for (int b=ly, gy2=gy; b<shrdLen; b+=blockDim.y, gy2+=blockDim.y) {
        // move row_set get_local_size(1) along coloumns
        for (int a=lx, gx2=gx; a<shrdLen; a+=blockDim.x, gx2+=blockDim.x) {
            load2ShrdMem<T, channels>(shrdMem, iptr, a, b, shrdLen, schStride,
                                      in.dims[0], in.dims[1], gx2-radius, gy2-radius, ichStride,
                                      in.strides[1], in.strides[0]);
        }
    }

    int i   = lx + radius;
    int j   = ly + radius;

    __syncthreads();

    if (gx>=in.dims[0] || gy>=in.dims[1])
        return;

    float means[channels];
    float centers[channels];
    float tmpclrs[channels];

    // clear means and centers for this pixel
#pragma unroll
    for(int ch=0; ch<channels; ++ch) {
        means[ch] = 0.0f;
        centers[ch] = shrdMem[lIdx(i, j, shrdLen, 1)+ch*schStride];
    }

    // scope of meanshift iterationd begin
    for(uint it=0; it<iter; ++it) {

        int count   = 0;
        int shift_x = 0;
        int shift_y = 0;

        for(int wj=-radius; wj<=radius; ++wj) {
            int hit_count = 0;

            for(int wi=-radius; wi<=radius; ++wi) {

                int tj = j + wj;
                int ti = i + wi;

                // proceed
                float norm = 0.0f;
#pragma unroll
                for(int ch=0; ch<channels; ++ch) {
                    tmpclrs[ch] = shrdMem[lIdx(ti, tj, shrdLen, 1)+ch*schStride];
                    norm += (centers[ch]-tmpclrs[ch]) * (centers[ch]-tmpclrs[ch]);
                }

                if (norm<= cvar) {
#pragma unroll
                    for(int ch=0; ch<channels; ++ch)
                        means[ch] += tmpclrs[ch];

                    shift_x += wi;
                    ++hit_count;
                }
            }
            count+= hit_count;
            shift_y += wj*hit_count;
        }

        if (count==0) {
            break;
        }

        const float fcount = 1.f/count;
        const int mean_x = (int)(shift_x*fcount+0.5f);
//.........这里部分代码省略.........

开发者ID:victorv，项目名称:arrayfire，代码行数:101，代码来源:meanshift.hpp

示例6: bilateralKernel

static __global__
void bilateralKernel(Param<outType> out, CParam<inType> in,
                     float sigma_space, float sigma_color,
                     int gaussOff, int nBBS0, int nBBS1)
{
    SharedMemory<outType> shared;
    outType *localMem = shared.getPointer();
    outType *gauss2d  = localMem + gaussOff;

    const int radius      = max((int)(sigma_space * 1.5f), 1);
    const int padding     = 2 * radius;
    const int window_size = padding + 1;
    const int shrdLen     = THREADS_X + padding;
    const float variance_range = sigma_color * sigma_color;
    const float variance_space = sigma_space * sigma_space;

    // gfor batch offsets
    unsigned b2 = blockIdx.x / nBBS0;
    unsigned b3 = blockIdx.y / nBBS1;
    const inType* iptr  = (const inType *) in.ptr + (b2 * in.strides[2]  + b3 * in.strides[3] );
    outType*       optr = (outType *     )out.ptr + (b2 * out.strides[2] + b3 * out.strides[3]);

    int lx = threadIdx.x;
    int ly = threadIdx.y;

    const int gx = THREADS_X * (blockIdx.x-b2*nBBS0) + lx;
    const int gy = THREADS_Y * (blockIdx.y-b3*nBBS1) + ly;

    // generate gauss2d spatial variance values for block
    if (lx<window_size && ly<window_size) {
        int x = lx - radius;
        int y = ly - radius;
        gauss2d[ly*window_size+lx] = exp( ((x*x) + (y*y)) / (-2.f * variance_space));
    }

    // pull image to local memory
    for (int b=ly, gy2=gy; b<shrdLen; b+=blockDim.y, gy2+=blockDim.y) {
        // move row_set get_local_size(1) along coloumns
        for (int a=lx, gx2=gx; a<shrdLen; a+=blockDim.x, gx2+=blockDim.x) {
            load2ShrdMem<inType, outType>(localMem, iptr, a, b, shrdLen, in.dims[0], in.dims[1],
                                          gx2-radius, gy2-radius, in.strides[1], in.strides[0]);
        }
    }

    __syncthreads();

    if (gx<in.dims[0] && gy<in.dims[1]) {
        lx += radius;
        ly += radius;
        const outType center_color = localMem[ly*shrdLen+lx];
        outType res  = 0;
        outType norm = 0;
#pragma unroll
        for(int wj=0; wj<window_size; ++wj) {
            int joff = (ly+wj-radius)*shrdLen + (lx-radius);
            int goff = wj*window_size;
#pragma unroll
            for(int wi=0; wi<window_size; ++wi) {
                const outType tmp_color   = localMem[joff+wi];
                const outType gauss_range = gaussian1d(center_color - tmp_color, variance_range);
                const outType weight      = gauss2d[goff+wi] * gauss_range;
                norm += weight;
                res  += tmp_color * weight;
            }
        }
        optr[gy*out.strides[1]+gx] = res / norm;
    }
}

开发者ID:Brainiarc7，项目名称:arrayfire，代码行数:68，代码来源:bilateral.hpp

注：本文中的SharedMemory::getPointer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。