本文整理汇总了C++中GpuMat::type方法的典型用法代码示例。如果您正苦于以下问题:C++ GpuMat::type方法的具体用法?C++ GpuMat::type怎么用?C++ GpuMat::type使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类GpuMat
的用法示例。
在下文中一共展示了GpuMat::type方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: h
void cv::gpu::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, Scalar value, Stream& _stream)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
GpuMat dst = _dst.getGpuMat();
cudaStream_t stream = StreamAccessor::getStream(_stream);
if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))
{
NppiSize srcsz;
srcsz.width = src.cols;
srcsz.height = src.rows;
NppiSize dstsz;
dstsz.width = dst.cols;
dstsz.height = dst.rows;
NppStreamHandler h(stream);
switch (src.type())
{
case CV_8UC1:
{
Npp8u nVal = saturate_cast<Npp8u>(value[0]);
nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
case CV_8UC4:
{
Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};
nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
case CV_32SC1:
{
Npp32s nVal = saturate_cast<Npp32s>(value[0]);
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
case CV_32FC1:
{
Npp32f val = saturate_cast<Npp32f>(value[0]);
Npp32s nVal = *(reinterpret_cast<Npp32s_a*>(&val));
nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,
dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );
break;
}
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
else
{
typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
static const caller_t callers[6][4] =
{
{ copyMakeBorder_caller<uchar, 1> , copyMakeBorder_caller<uchar, 2> , copyMakeBorder_caller<uchar, 3> , copyMakeBorder_caller<uchar, 4>},
{0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},
{ copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/, copyMakeBorder_caller<ushort, 3> , copyMakeBorder_caller<ushort, 4>},
{ copyMakeBorder_caller<short, 1> , 0/*copyMakeBorder_caller<short, 2>*/ , copyMakeBorder_caller<short, 3> , copyMakeBorder_caller<short, 4>},
{0/*copyMakeBorder_caller<int, 1>*/, 0/*copyMakeBorder_caller<int, 2>*/ , 0/*copyMakeBorder_caller<int, 3>*/, 0/*copyMakeBorder_caller<int , 4>*/},
{ copyMakeBorder_caller<float, 1> , 0/*copyMakeBorder_caller<float, 2>*/ , copyMakeBorder_caller<float, 3> , copyMakeBorder_caller<float ,4>}
};
caller_t func = callers[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, dst, top, left, borderType, value, stream);
}
}
示例2: pyrDown
void cv::gpu::FarnebackOpticalFlow::operator ()(
const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s)
{
CV_Assert(frame0.type() == CV_8U && frame1.type() == CV_8U);
CV_Assert(frame0.size() == frame1.size());
CV_Assert(polyN == 5 || polyN == 7);
CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6);
Stream streams[5];
if (S(s))
streams[0] = s;
Size size = frame0.size();
GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
flowx.create(size, CV_32F);
flowy.create(size, CV_32F);
GpuMat flowx0 = flowx;
GpuMat flowy0 = flowy;
// Crop unnecessary levels
double scale = 1;
int numLevelsCropped = 0;
for (; numLevelsCropped < numLevels; numLevelsCropped++)
{
scale *= pyrScale;
if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)
break;
}
streams[0].enqueueConvert(frame0, frames_[0], CV_32F);
streams[1].enqueueConvert(frame1, frames_[1], CV_32F);
if (fastPyramids)
{
// Build Gaussian pyramids using pyrDown()
pyramid0_.resize(numLevelsCropped + 1);
pyramid1_.resize(numLevelsCropped + 1);
pyramid0_[0] = frames_[0];
pyramid1_[0] = frames_[1];
for (int i = 1; i <= numLevelsCropped; ++i)
{
pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);
pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);
}
}
setPolynomialExpansionConsts(polyN, polySigma);
device::optflow_farneback::setUpdateMatricesConsts();
for (int k = numLevelsCropped; k >= 0; k--)
{
streams[0].waitForCompletion();
scale = 1;
for (int i = 0; i < k; i++)
scale *= pyrScale;
double sigma = (1./scale - 1) * 0.5;
int smoothSize = cvRound(sigma*5) | 1;
smoothSize = std::max(smoothSize, 3);
int width = cvRound(size.width*scale);
int height = cvRound(size.height*scale);
if (fastPyramids)
{
width = pyramid0_[k].cols;
height = pyramid0_[k].rows;
}
if (k > 0)
{
curFlowX.create(height, width, CV_32F);
curFlowY.create(height, width, CV_32F);
}
else
{
curFlowX = flowx0;
curFlowY = flowy0;
}
if (!prevFlowX.data)
{
if (flags & OPTFLOW_USE_INITIAL_FLOW)
{
#if ENABLE_GPU_RESIZE
resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);
resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);
streams[0].enqueueConvert(curFlowX, curFlowX, curFlowX.depth(), scale);
streams[1].enqueueConvert(curFlowY, curFlowY, curFlowY.depth(), scale);
#else
Mat tmp1, tmp2;
flowx0.download(tmp1);
resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA);
tmp2 *= scale;
curFlowX.upload(tmp2);
flowy0.download(tmp1);
resize(tmp1, tmp2, Size(width, height), 0, 0, INTER_AREA);
tmp2 *= scale;
//.........这里部分代码省略.........
示例3: void
void cv::cuda::warpPerspective(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
{
GpuMat src = _src.getGpuMat();
Mat M = _M.getMat();
CV_Assert( M.rows == 3 && M.cols == 3 );
const int interpolation = flags & INTER_MAX;
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP) ;
_dst.create(dsize, src.type());
GpuMat dst = _dst.getGpuMat();
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
static const bool useNppTab[6][4][3] =
{
{
{false, false, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
}
};
bool useNpp = borderMode == BORDER_CONSTANT && ofs.x == 0 && ofs.y == 0 && useNppTab[src.depth()][src.channels() - 1][interpolation];
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
if (useNpp)
{
typedef void (*func_t)(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& dst, double coeffs[][3], int flags, cudaStream_t stream);
static const func_t funcs[2][6][4] =
{
{
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
},
{
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
}
};
dst.setTo(borderValue, stream);
double coeffs[3][3];
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
M.convertTo(coeffsMat, coeffsMat.type());
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, dst, coeffs, interpolation, StreamAccessor::getStream(stream));
}
else
//.........这里部分代码省略.........
示例4: void
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, const GpuMat& train,
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
const GpuMat& mask, Stream& stream)
{
if (query.empty() || train.empty())
return;
using namespace ::cv::gpu::device::bf_knnmatch;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
int cc, cudaStream_t stream);
static const caller_t callers[3][6] =
{
{
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
matchL1_gpu<int>, matchL1_gpu<float>
},
{
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
},
{
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
}
};
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.type() == query.type() && train.cols == query.cols);
const int nQuery = query.rows;
const int nTrain = train.rows;
if (k == 2)
{
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
}
else
{
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
ensureSizeIsEnough(nQuery, k, CV_32F, distance);
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
}
if (stream)
stream.enqueueMemSet(trainIdx, Scalar::all(-1));
else
trainIdx.setTo(Scalar::all(-1));
caller_t func = callers[distType][query.depth()];
CV_Assert(func != 0);
DeviceInfo info;
int cc = info.majorVersion() * 10 + info.minorVersion();
func(query, train, k, mask, trainIdx, distance, allDist, cc, StreamAccessor::getStream(stream));
}
示例5: devcopy
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() );
devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost);
}
示例6: csbp_operator
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
{
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4));
const Scalar zero = Scalar::all(0);
cudaStream_t cudaStream = StreamAccessor::getStream(stream);
////////////////////////////////////////////////////////////////////////////////////////////
// Init
int rows = left.rows;
int cols = left.cols;
rthis.levels = std::min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0)));
int levels = rthis.levels;
// compute sizes
AutoBuffer<int> buf(levels * 3);
int* cols_pyr = buf;
int* rows_pyr = cols_pyr + levels;
int* nr_plane_pyr = rows_pyr + levels;
cols_pyr[0] = cols;
rows_pyr[0] = rows;
nr_plane_pyr[0] = rthis.nr_plane;
for (int i = 1; i < levels; i++)
{
cols_pyr[i] = cols_pyr[i-1] / 2;
rows_pyr[i] = rows_pyr[i-1] / 2;
nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2;
}
GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected;
//allocate buffers
int buffers_count = 10; // (up + down + left + right + disp_selected_pyr) * 2
buffers_count += 2; // data_cost has twice more rows than other buffers, what's why +2, not +1;
buffers_count += 1; // data_cost_selected
mbuf.create(rows * rthis.nr_plane * buffers_count, cols, DataType<T>::type);
data_cost = mbuf.rowRange(0, rows * rthis.nr_plane * 2);
data_cost_selected = mbuf.rowRange(data_cost.rows, data_cost.rows + rows * rthis.nr_plane);
for(int k = 0; k < 2; ++k) // in/out
{
GpuMat sub1 = mbuf.rowRange(data_cost.rows + data_cost_selected.rows, mbuf.rows);
GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2);
GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] };
for(int _r = 0; _r < 5; ++_r)
{
*buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5);
assert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane);
}
};
size_t elem_step = mbuf.step / sizeof(T);
Size temp_size = data_cost.size();
if ((size_t)temp_size.area() < elem_step * rows_pyr[levels - 1] * rthis.ndisp)
temp_size = Size(static_cast<int>(elem_step), rows_pyr[levels - 1] * rthis.ndisp);
temp.create(temp_size, DataType<T>::type);
////////////////////////////////////////////////////////////////////////////
// Compute
load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
if (stream)
{
stream.enqueueMemSet(l[0], zero);
stream.enqueueMemSet(d[0], zero);
stream.enqueueMemSet(r[0], zero);
stream.enqueueMemSet(u[0], zero);
stream.enqueueMemSet(l[1], zero);
stream.enqueueMemSet(d[1], zero);
stream.enqueueMemSet(r[1], zero);
stream.enqueueMemSet(u[1], zero);
stream.enqueueMemSet(data_cost, zero);
stream.enqueueMemSet(data_cost_selected, zero);
}
else
{
l[0].setTo(zero);
d[0].setTo(zero);
r[0].setTo(zero);
u[0].setTo(zero);
l[1].setTo(zero);
d[1].setTo(zero);
//.........这里部分代码省略.........
示例7: void
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
{
using namespace ::cv::gpu::device::matrix_reductions::minmaxloc;
typedef void (*Caller)(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
typedef void (*MaskedCaller)(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
static Caller multipass_callers[] =
{
minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>,
minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>,
minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0
};
static Caller singlepass_callers[] =
{
minMaxLocCaller<unsigned char>, minMaxLocCaller<char>,
minMaxLocCaller<unsigned short>, minMaxLocCaller<short>,
minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double>
};
static MaskedCaller masked_multipass_callers[] =
{
minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>,
minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>,
minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0
};
static MaskedCaller masked_singlepass_callers[] =
{
minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>,
minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>,
minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double>
};
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1);
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
double minVal_;
if (!minVal) minVal = &minVal_;
double maxVal_;
if (!maxVal) maxVal = &maxVal_;
int minLoc_[2];
int maxLoc_[2];
Size valbuf_size, locbuf_size;
getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), valbuf_size.width,
valbuf_size.height, locbuf_size.width, locbuf_size.height);
ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);
if (mask.empty())
{
Caller* callers = multipass_callers;
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
callers = singlepass_callers;
Caller caller = callers[src.type()];
CV_Assert(caller != 0);
caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
}
else
{
MaskedCaller* callers = masked_multipass_callers;
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()];
CV_Assert(caller != 0);
caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
}
if (minLoc) {
minLoc->x = minLoc_[0];
minLoc->y = minLoc_[1];
}
if (maxLoc) {
maxLoc->x = maxLoc_[0];
maxLoc->y = maxLoc_[1];
}
}
示例8: h
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
{
class LevelsInit
{
public:
Npp32s pLevels[256];
const Npp32s* pLevels3[3];
int nValues3[3];
#if (CUDA_VERSION > 4020)
GpuMat d_pLevels;
#endif
LevelsInit()
{
nValues3[0] = nValues3[1] = nValues3[2] = 256;
for (int i = 0; i < 256; ++i)
pLevels[i] = i;
#if (CUDA_VERSION <= 4020)
pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
#else
d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
#endif
}
};
static LevelsInit lvls;
int cn = src.channels();
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
CV_Assert(lut.depth() == CV_8U && (lut.channels() == 1 || lut.channels() == cn) && lut.rows * lut.cols == 256 && lut.isContinuous());
dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));
NppiSize sz;
sz.height = src.rows;
sz.width = src.cols;
Mat nppLut;
lut.convertTo(nppLut, CV_32S);
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
if (src.type() == CV_8UC1)
{
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
#else
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), lvls.d_pLevels.ptr<Npp32s>(), 256) );
#endif
}
else
{
const Npp32s* pValues3[3];
Mat nppLut3[3];
if (nppLut.channels() == 1)
{
#if (CUDA_VERSION <= 4020)
pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
#else
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>();
#endif
}
else
{
cv::split(nppLut, nppLut3);
#if (CUDA_VERSION <= 4020)
pValues3[0] = nppLut3[0].ptr<Npp32s>();
pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[2] = nppLut3[2].ptr<Npp32s>();
#else
GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data));
GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data));
GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data));
pValues3[0] = d_nppLut0.ptr<Npp32s>();
pValues3[1] = d_nppLut1.ptr<Npp32s>();
pValues3[2] = d_nppLut2.ptr<Npp32s>();
#endif
}
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
示例9: void
void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& s)
{
CV_Assert(M.rows == 3 && M.cols == 3);
int interpolation = flags & INTER_MAX;
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
static const bool useNppTab[6][4][3] =
{
{
{false, false, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, false}
},
{
{false, false, false},
{false, false, false},
{false, false, false},
{false, false, false}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
},
{
{false, true, true},
{false, false, false},
{false, true, true},
{false, false, true}
}
};
bool useNpp = borderMode == BORDER_CONSTANT;
useNpp = useNpp && useNppTab[src.depth()][src.channels() - 1][interpolation];
#ifdef linux
// NPP bug on float data
useNpp = useNpp && src.depth() != CV_32F;
#endif
if (useNpp)
{
typedef void (*func_t)(const cv::gpu::GpuMat& src, cv::Size wholeSize, cv::Point ofs, cv::gpu::GpuMat& dst, double coeffs[][3], cv::Size dsize, int flags, cudaStream_t stream);
static const func_t funcs[2][6][4] =
{
{
{NppWarp<CV_8U, nppiWarpPerspective_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspective_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspective_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspective_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspective_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspective_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspective_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspective_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspective_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspective_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspective_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspective_32f_C4R>::call}
},
{
{NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C1R>::call, 0, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C3R>::call, NppWarp<CV_8U, nppiWarpPerspectiveBack_8u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C1R>::call, 0, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C3R>::call, NppWarp<CV_16U, nppiWarpPerspectiveBack_16u_C4R>::call},
{0, 0, 0, 0},
{NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C1R>::call, 0, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C3R>::call, NppWarp<CV_32S, nppiWarpPerspectiveBack_32s_C4R>::call},
{NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C1R>::call, 0, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C3R>::call, NppWarp<CV_32F, nppiWarpPerspectiveBack_32f_C4R>::call}
}
};
double coeffs[3][3];
Mat coeffsMat(3, 3, CV_64F, (void*)coeffs);
M.convertTo(coeffsMat, coeffsMat.type());
const func_t func = funcs[(flags & WARP_INVERSE_MAP) != 0][src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, wholeSize, ofs, dst, coeffs, dsize, interpolation, StreamAccessor::getStream(s));
}
else
{
using namespace cv::gpu::device::imgproc;
typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation,
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
//.........这里部分代码省略.........
示例10: NppStatus
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
|| interpolation == INTER_CUBIC || interpolation == INTER_AREA);
CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
if (dsize == Size())
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
else
{
fx = static_cast<double>(dsize.width) / src.cols;
fy = static_cast<double>(dsize.height) / src.rows;
}
if (dsize != dst.size())
dst.create(dsize, src.type());
if (dsize == src.size())
{
if (s)
s.enqueueCopy(src, dst);
else
src.copyTo(dst);
return;
}
cudaStream_t stream = StreamAccessor::getStream(s);
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4);
useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || (src.type() == CV_8UC4 && interpolation != INTER_AREA));
if (useNpp)
{
typedef NppStatus (*func_t)(const Npp8u * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u * pDst, int nDstStep, NppiSize dstROISize,
double xFactor, double yFactor, int eInterpolation);
const func_t funcs[4] = { nppiResize_8u_C1R, 0, 0, nppiResize_8u_C4R };
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
NppiSize srcsz;
srcsz.width = wholeSize.width;
srcsz.height = wholeSize.height;
NppiRect srcrect;
srcrect.x = ofs.x;
srcrect.y = ofs.y;
srcrect.width = src.cols;
srcrect.height = src.rows;
NppiSize dstsz;
dstsz.width = dst.cols;
dstsz.height = dst.rows;
NppStreamHandler h(stream);
nppSafeCall( funcs[src.channels() - 1](src.datastart, srcsz, static_cast<int>(src.step), srcrect,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
else
{
using namespace ::cv::gpu::device::imgproc;
typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{resize_gpu<uchar> , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3> , resize_gpu<uchar4> },
{0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/ , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/},
{resize_gpu<ushort> , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3> , resize_gpu<ushort4> },
{resize_gpu<short> , 0 /*resize_gpu<short2>*/ , resize_gpu<short3> , resize_gpu<short4> },
{0 /*resize_gpu<int>*/ , 0 /*resize_gpu<int2>*/ , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ },
{resize_gpu<float> , 0 /*resize_gpu<float2>*/ , resize_gpu<float3> , resize_gpu<float4> }
};
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream);
}
}
示例11: createContinuous
void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
{
#ifndef HAVE_CUFFT
(void) _src;
(void) _dst;
(void) dft_size;
(void) flags;
(void) stream;
throw_no_cuda();
#else
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
// We don't support unpacked output (in the case of real input)
CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) );
const bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);
const bool is_row_dft = (flags & DFT_ROWS) != 0;
const bool is_scaled_dft = (flags & DFT_SCALE) != 0;
const bool is_inverse = (flags & DFT_INVERSE) != 0;
const bool is_complex_input = src.channels() == 2;
const bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
// We don't support real-to-real transform
CV_Assert( is_complex_input || is_complex_output );
GpuMat src_cont = src;
// Make sure here we work with the continuous input,
// as CUFFT can't handle gaps
createContinuous(src.rows, src.cols, src.type(), src_cont);
if (src_cont.data != src.data)
src.copyTo(src_cont, stream);
Size dft_size_opt = dft_size;
if (is_1d_input && !is_row_dft)
{
// If the source matrix is single column handle it as single row
dft_size_opt.width = std::max(dft_size.width, dft_size.height);
dft_size_opt.height = std::min(dft_size.width, dft_size.height);
}
CV_Assert( dft_size_opt.width > 1 );
cufftType dft_type = CUFFT_R2C;
if (is_complex_input)
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
cufftHandle plan;
if (is_1d_input || is_row_dft)
cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) );
else
cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) );
cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
if (is_complex_input)
{
if (is_complex_output)
{
createContinuous(dft_size, CV_32FC2, _dst);
GpuMat dst = _dst.getGpuMat();
cufftSafeCall(cufftExecC2C(
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
}
else
{
createContinuous(dft_size, CV_32F, _dst);
GpuMat dst = _dst.getGpuMat();
cufftSafeCall(cufftExecC2R(
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
}
}
else
{
// We could swap dft_size for efficiency. Here we must reflect it
if (dft_size == dft_size_opt)
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst);
else
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst);
GpuMat dst = _dst.getGpuMat();
cufftSafeCall(cufftExecR2C(
plan, src_cont.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
}
cufftSafeCall( cufftDestroy(plan) );
if (is_scaled_dft)
cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
#endif
}
示例12: void
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
{
using namespace mathfunc::minmax;
typedef void (*Caller)(const DevMem2D, double*, double*, PtrStep);
typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, PtrStep);
static Caller multipass_callers[7] = {
minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>,
minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>,
minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0 };
static Caller singlepass_callers[7] = {
minMaxCaller<unsigned char>, minMaxCaller<char>,
minMaxCaller<unsigned short>, minMaxCaller<short>,
minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double> };
static MaskedCaller masked_multipass_callers[7] = {
minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>,
minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>,
minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0 };
static MaskedCaller masked_singlepass_callers[7] = {
minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>,
minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>,
minMaxMaskCaller<int>, minMaxMaskCaller<float>,
minMaxMaskCaller<double> };
CV_Assert(src.channels() == 1);
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) &&
DeviceInfo().supports(NATIVE_DOUBLE)));
double minVal_; if (!minVal) minVal = &minVal_;
double maxVal_; if (!maxVal) maxVal = &maxVal_;
Size buf_size;
getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf);
if (mask.empty())
{
Caller* callers = multipass_callers;
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
callers = singlepass_callers;
Caller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type");
caller(src, minVal, maxVal, buf);
}
else
{
MaskedCaller* callers = masked_multipass_callers;
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type");
caller(src, mask, minVal, maxVal, buf);
}
}
示例13:
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
{
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
}
示例14: skinDetect
Mat visionUtils::skinDetect(Mat captureframe, Mat3b *skinDetectHSV, Mat *skinMask, std::vector<int> adaptiveHSV, int minPixelSize, int imgBlurPixels, int imgMorphPixels, int singleRegionChoice, bool displayFaces)
{
if (adaptiveHSV.size()!=6 || adaptiveHSV.empty())
{
adaptiveHSV.clear();
adaptiveHSV.push_back(5);
adaptiveHSV.push_back(38);
adaptiveHSV.push_back(51);
adaptiveHSV.push_back(17);
adaptiveHSV.push_back(250);
adaptiveHSV.push_back(242);
}
//int step = 0;
Mat3b frameTemp;
Mat3b frame;
// Forcing resize to 640x480 -> all thresholds / pixel filters configured for this size.....
// Note returned to original size at end...
Size s = captureframe.size();
cv::resize(captureframe,captureframe,Size(640,480));
if (useGPU)
{
GpuMat imgGPU, imgGPUHSV;
imgGPU.upload(captureframe);
cv::cvtColor(imgGPU, imgGPUHSV, CV_BGR2HSV);
GaussianBlur(imgGPUHSV, imgGPUHSV, Size(imgBlurPixels,imgBlurPixels), 1, 1);
imgGPUHSV.download(frameTemp);
}
else
{
cv::cvtColor(captureframe, frameTemp, CV_BGR2HSV);
GaussianBlur(frameTemp, frameTemp, Size(imgBlurPixels,imgBlurPixels), 1, 1);
}
// Potential FASTER VERSION using inRange
Mat frameThreshold = Mat::zeros(frameTemp.rows,frameTemp.cols, CV_8UC1);
Mat hsvMin = (Mat_<int>(1,3) << adaptiveHSV[0], adaptiveHSV[1],adaptiveHSV[2] );
Mat hsvMax = (Mat_<int>(1,3) << adaptiveHSV[3], adaptiveHSV[4],adaptiveHSV[5] );
inRange(frameTemp,hsvMin ,hsvMax, frameThreshold);
frameTemp.copyTo(frame,frameThreshold);
/* BGR CONVERSION AND THRESHOLD */
Mat1b frame_gray;
// send HSV to skinDetectHSV for return
*skinDetectHSV=frame.clone();
cv::cvtColor(frame, frame_gray, CV_BGR2GRAY);
// Adaptive thresholding technique
// 1. Threshold data to find main areas of skin
adaptiveThreshold(frame_gray,frame_gray,255,ADAPTIVE_THRESH_GAUSSIAN_C,THRESH_BINARY_INV,9,1);
if (useGPU)
{
GpuMat imgGPU;
imgGPU.upload(frame_gray);
// 2. Fill in thresholded areas
#if CV_MAJOR_VERSION == 2
gpu::morphologyEx(imgGPU, imgGPU, CV_MOP_CLOSE, Mat1b(imgMorphPixels,imgMorphPixels,1), Point(-1, -1), 2);
gpu::GaussianBlur(imgGPU, imgGPU, Size(imgBlurPixels,imgBlurPixels), 1, 1);
#elif CV_MAJOR_VERSION == 3
//TODO: Check if that's correct
Mat element = getStructuringElement(MORPH_RECT, Size(imgMorphPixels, imgMorphPixels), Point(-1, -1));
Ptr<cuda::Filter> closeFilter = cuda::createMorphologyFilter(MORPH_CLOSE, imgGPU.type(), element, Point(-1, -1), 2);
closeFilter->apply(imgGPU, imgGPU);
cv::Ptr<cv::cuda::Filter> gaussianFilter = cv::cuda::createGaussianFilter(imgGPU.type(), imgGPU.type(), Size(imgMorphPixels, imgMorphPixels), 1, 1);
gaussianFilter->apply(imgGPU, imgGPU);
#endif
imgGPU.download(frame_gray);
}
else
{
// 2. Fill in thresholded areas
morphologyEx(frame_gray, frame_gray, CV_MOP_CLOSE, Mat1b(imgMorphPixels,imgMorphPixels,1), Point(-1, -1), 2);
GaussianBlur(frame_gray, frame_gray, Size(imgBlurPixels,imgBlurPixels), 1, 1);
// Select single largest region from image, if singleRegionChoice is selected (1)
}
if (singleRegionChoice)
{
*skinMask = cannySegmentation(frame_gray, -1, displayFaces);
}
else // Detect each separate block and remove blobs smaller than a few pixels
{
*skinMask = cannySegmentation(frame_gray, minPixelSize, displayFaces);
}
// Just return skin
Mat frame_skin;
captureframe.copyTo(frame_skin,*skinMask); // Copy captureframe data to frame_skin, using mask from frame_ttt
//.........这里部分代码省略.........
示例15: void
void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& train,
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
const GpuMat& mask, Stream& stream)
{
if (query.empty() || train.empty())
return;
using namespace cv::gpu::device::bf_knnmatch;
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
cudaStream_t stream);
static const caller_t callersL1[] =
{
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
matchL1_gpu<int>, matchL1_gpu<float>
};
static const caller_t callersL2[] =
{
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
};
static const caller_t callersHamming[] =
{
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
};
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.type() == query.type() && train.cols == query.cols);
CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING);
const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming;
const int nQuery = query.rows;
const int nTrain = train.rows;
if (k == 2)
{
ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx);
ensureSizeIsEnough(1, nQuery, CV_32FC2, distance);
}
else
{
ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx);
ensureSizeIsEnough(nQuery, k, CV_32F, distance);
ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist);
}
if (stream)
stream.enqueueMemSet(trainIdx, Scalar::all(-1));
else
trainIdx.setTo(Scalar::all(-1));
caller_t func = callers[query.depth()];
CV_Assert(func != 0);
func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
}