mirror of
https://github.com/opencv/opencv.git
synced 2024-11-28 13:10:12 +08:00
restore cudaSafeCall
This commit is contained in:
parent
2bad639aee
commit
b50090f850
@ -64,10 +64,12 @@ namespace cv { namespace gpu {
|
||||
}
|
||||
}}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cvCudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
|
||||
#ifndef cudaSafeCall
|
||||
#if defined(__GNUC__)
|
||||
#define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, __func__)
|
||||
#else /* defined(__CUDACC__) || defined(__MSVC__) */
|
||||
#define cudaSafeCall(expr) cv::gpu::checkCudaError(expr, __FILE__, __LINE__, "")
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu
|
||||
@ -104,7 +106,7 @@ namespace cv { namespace gpu
|
||||
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cvCudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@ -332,10 +332,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
@ -358,10 +358,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
@ -383,10 +383,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
} // namespace transform_detail
|
||||
|
@ -124,31 +124,31 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
void writeScalar(const uchar* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
|
||||
}
|
||||
void writeScalar(const schar* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
|
||||
}
|
||||
void writeScalar(const ushort* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
|
||||
}
|
||||
void writeScalar(const short* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
|
||||
}
|
||||
void writeScalar(const int* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
|
||||
}
|
||||
void writeScalar(const float* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
|
||||
}
|
||||
void writeScalar(const double* vals)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall ( cudaDeviceSynchronize() );
|
||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||
@ -209,10 +209,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
|
||||
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall ( cudaDeviceSynchronize() );
|
||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
|
||||
@ -290,8 +290,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
template<typename T, typename D, typename S>
|
||||
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||
cvCudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
|
||||
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
@ -131,14 +131,14 @@ bool cv::gpu::Stream::queryIfComplete()
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess)
|
||||
return err == cudaSuccess;
|
||||
|
||||
cvCudaSafeCall(err);
|
||||
cudaSafeCall(err);
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::waitForCompletion()
|
||||
{
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
cvCudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
@ -148,7 +148,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
|
||||
@ -157,7 +157,7 @@ void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
|
||||
@ -166,7 +166,7 @@ void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
|
||||
@ -175,7 +175,7 @@ void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
|
||||
@ -184,7 +184,7 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
size_t bwidth = src.cols * src.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
@ -201,7 +201,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
|
||||
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
|
||||
return;
|
||||
}
|
||||
|
||||
@ -212,7 +212,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
|
||||
{
|
||||
int ival = saturate_cast<uchar>(val[0]);
|
||||
cvCudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
|
||||
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -299,7 +299,7 @@ void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userDat
|
||||
|
||||
cudaStream_t stream = Impl::getStream(impl);
|
||||
|
||||
cvCudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
|
||||
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
|
||||
#else
|
||||
(void) callback;
|
||||
(void) userData;
|
||||
@ -328,7 +328,7 @@ void cv::gpu::Stream::create()
|
||||
release();
|
||||
|
||||
cudaStream_t stream;
|
||||
cvCudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
cudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
|
||||
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
|
||||
|
||||
@ -340,7 +340,7 @@ void cv::gpu::Stream::release()
|
||||
{
|
||||
if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
|
||||
{
|
||||
cvCudaSafeCall( cudaStreamDestroy(impl->stream) );
|
||||
cudaSafeCall( cudaStreamDestroy(impl->stream) );
|
||||
cv::fastFree(impl);
|
||||
}
|
||||
}
|
||||
|
@ -91,25 +91,25 @@ int cv::gpu::getCudaEnabledDeviceCount()
|
||||
if (error == cudaErrorNoDevice)
|
||||
return 0;
|
||||
|
||||
cvCudaSafeCall( error );
|
||||
cudaSafeCall( error );
|
||||
return count;
|
||||
}
|
||||
|
||||
void cv::gpu::setDevice(int device)
|
||||
{
|
||||
cvCudaSafeCall( cudaSetDevice( device ) );
|
||||
cudaSafeCall( cudaSetDevice( device ) );
|
||||
}
|
||||
|
||||
int cv::gpu::getDevice()
|
||||
{
|
||||
int device;
|
||||
cvCudaSafeCall( cudaGetDevice( &device ) );
|
||||
cudaSafeCall( cudaGetDevice( &device ) );
|
||||
return device;
|
||||
}
|
||||
|
||||
void cv::gpu::resetDevice()
|
||||
{
|
||||
cvCudaSafeCall( cudaDeviceReset() );
|
||||
cudaSafeCall( cudaDeviceReset() );
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -302,7 +302,7 @@ namespace
|
||||
if (!props_[devID])
|
||||
{
|
||||
props_[devID] = new cudaDeviceProp;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(props_[devID], devID) );
|
||||
}
|
||||
|
||||
return props_[devID];
|
||||
@ -322,7 +322,7 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory)
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(device_id_);
|
||||
|
||||
cvCudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
||||
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
|
||||
|
||||
if (prevDeviceID != device_id_)
|
||||
setDevice(prevDeviceID);
|
||||
@ -408,8 +408,8 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
printf("Device count: %d\n", count);
|
||||
|
||||
int driverVersion = 0, runtimeVersion = 0;
|
||||
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
|
||||
const char *computeMode[] = {
|
||||
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
|
||||
@ -423,7 +423,7 @@ void cv::gpu::printCudaDeviceInfo(int device)
|
||||
for(int dev = beg; dev < end; ++dev)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
|
||||
printf("\nDevice %d: \"%s\"\n", dev, prop.name);
|
||||
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
|
||||
@ -485,13 +485,13 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
|
||||
int end = valid ? device+1 : count;
|
||||
|
||||
int driverVersion = 0, runtimeVersion = 0;
|
||||
cvCudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cvCudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
|
||||
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
|
||||
|
||||
for(int dev = beg; dev < end; ++dev)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
|
||||
|
||||
const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
|
||||
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
|
||||
@ -983,7 +983,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
||||
@ -998,7 +998,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -1040,7 +1040,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
|
||||
@ -1057,7 +1057,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -1088,7 +1088,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
|
||||
@ -1105,7 +1105,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -1131,7 +1131,7 @@ namespace
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<src_t>(), static_cast<int>(dst.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -1148,15 +1148,15 @@ namespace
|
||||
public:
|
||||
void copy(const Mat& src, GpuMat& dst) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||
}
|
||||
void copy(const GpuMat& src, Mat& dst) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
void copy(const GpuMat& src, GpuMat& dst) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
||||
}
|
||||
|
||||
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
||||
@ -1301,7 +1301,7 @@ namespace
|
||||
{
|
||||
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
||||
cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1312,7 +1312,7 @@ namespace
|
||||
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
||||
{
|
||||
int val = saturate_cast<uchar>(s[0]);
|
||||
cvCudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
||||
cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -1367,7 +1367,7 @@ namespace
|
||||
|
||||
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
|
||||
{
|
||||
cvCudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
||||
cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
||||
}
|
||||
|
||||
void free(void* devPtr) const
|
||||
|
@ -191,18 +191,18 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return G
|
||||
|
||||
void cv::gpu::registerPageLocked(Mat& m)
|
||||
{
|
||||
cvCudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
|
||||
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
|
||||
}
|
||||
|
||||
void cv::gpu::unregisterPageLocked(Mat& m)
|
||||
{
|
||||
cvCudaSafeCall( cudaHostUnregister(m.ptr()) );
|
||||
cudaSafeCall( cudaHostUnregister(m.ptr()) );
|
||||
}
|
||||
|
||||
bool cv::gpu::CudaMem::canMapHostMemory()
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
return (prop.canMapHostMemory != 0) ? true : false;
|
||||
}
|
||||
|
||||
@ -237,7 +237,7 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
if (_alloc_type == ALLOC_ZEROCOPY)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
|
||||
step = alignUpStep(step, prop.textureAlignment);
|
||||
}
|
||||
int64 _nettosize = (int64)step*rows;
|
||||
@ -252,9 +252,9 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
|
||||
|
||||
switch (alloc_type)
|
||||
{
|
||||
case ALLOC_PAGE_LOCKED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
|
||||
case ALLOC_WRITE_COMBINED: cvCudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
|
||||
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
|
||||
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
|
||||
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
|
||||
}
|
||||
|
||||
@ -273,7 +273,7 @@ GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
|
||||
GpuMat res;
|
||||
|
||||
void *pdev;
|
||||
cvCudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
|
||||
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
|
||||
res = GpuMat(rows, cols, type(), pdev, step);
|
||||
|
||||
return res;
|
||||
@ -283,7 +283,7 @@ void cv::gpu::CudaMem::release()
|
||||
{
|
||||
if( refcount && CV_XADD(refcount, -1) == 1 )
|
||||
{
|
||||
cvCudaSafeCall( cudaFreeHost(datastart ) );
|
||||
cudaSafeCall( cudaFreeHost(datastart ) );
|
||||
fastFree(refcount);
|
||||
}
|
||||
data = datastart = dataend = 0;
|
||||
|
@ -133,7 +133,7 @@ void cv::gpu::setGlDevice(int device)
|
||||
(void) device;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cvCudaSafeCall( cudaGLSetGLDevice(device) );
|
||||
cudaSafeCall( cudaGLSetGLDevice(device) );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
@ -184,7 +184,7 @@ namespace
|
||||
return;
|
||||
|
||||
cudaGraphicsResource_t resource;
|
||||
cvCudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
|
||||
cudaSafeCall( cudaGraphicsGLRegisterBuffer(&resource, buffer, cudaGraphicsMapFlagsNone) );
|
||||
|
||||
release();
|
||||
|
||||
@ -217,7 +217,7 @@ namespace
|
||||
CudaResource::GraphicsMapHolder::GraphicsMapHolder(cudaGraphicsResource_t* resource, cudaStream_t stream) : resource_(resource), stream_(stream)
|
||||
{
|
||||
if (resource_)
|
||||
cvCudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
|
||||
cudaSafeCall( cudaGraphicsMapResources(1, resource_, stream_) );
|
||||
}
|
||||
|
||||
CudaResource::GraphicsMapHolder::~GraphicsMapHolder()
|
||||
@ -240,14 +240,14 @@ namespace
|
||||
|
||||
void* dst;
|
||||
size_t size;
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&dst, &size, resource_) );
|
||||
|
||||
CV_DbgAssert( width * height == size );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst, width, src, spitch, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void CudaResource::copyTo(void* dst, size_t dpitch, size_t width, size_t height, cudaStream_t stream)
|
||||
@ -259,14 +259,14 @@ namespace
|
||||
|
||||
void* src;
|
||||
size_t size;
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&src, &size, resource_) );
|
||||
|
||||
CV_DbgAssert( width * height == size );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpy2D(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpy2DAsync(dst, dpitch, src, width, width, height, cudaMemcpyDeviceToDevice, stream) );
|
||||
}
|
||||
|
||||
void* CudaResource::map(cudaStream_t stream)
|
||||
@ -277,7 +277,7 @@ namespace
|
||||
|
||||
void* ptr;
|
||||
size_t size;
|
||||
cvCudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
|
||||
cudaSafeCall( cudaGraphicsResourceGetMappedPointer(&ptr, &size, resource_) );
|
||||
|
||||
h.reset();
|
||||
|
||||
|
@ -246,7 +246,7 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -287,7 +287,7 @@ namespace
|
||||
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -402,7 +402,7 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -427,7 +427,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -403,7 +403,7 @@ public:
|
||||
|
||||
unsigned int classified = 0;
|
||||
GpuMat dclassified(1, 1, CV_32S);
|
||||
cvCudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
|
||||
|
||||
PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize);
|
||||
|
||||
@ -448,11 +448,11 @@ public:
|
||||
if (groupThreshold <= 0 || objects.empty())
|
||||
return 0;
|
||||
|
||||
cvCudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudev::lbp::connectedConmonents(candidates, classified, objects, groupThreshold, grouping_eps, dclassified.ptr<unsigned int>());
|
||||
|
||||
cvCudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
return classified;
|
||||
}
|
||||
|
||||
@ -481,7 +481,7 @@ private:
|
||||
roiSize.height = frame.height;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
|
@ -1600,7 +1600,7 @@ namespace
|
||||
nppSafeCall( nppiAlphaPremul_16u_AC4R(src.ptr<Npp16u>(), static_cast<int>(src.step), dst.ptr<Npp16u>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1942,7 +1942,7 @@ void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
|
||||
nppSafeCall( nppiSwapChannels_8u_C4IR(image.ptr<Npp8u>(), static_cast<int>(image.step), sz, dstOrder) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stream& stream)
|
||||
|
@ -60,7 +60,7 @@ namespace cv { namespace gpu { namespace cudev {
|
||||
|
||||
void loadHueCSC(float hueCSC[9])
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
|
||||
}
|
||||
|
||||
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
|
||||
@ -190,10 +190,10 @@ namespace cv { namespace gpu { namespace cudev {
|
||||
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
|
||||
interopFrame.cols, interopFrame.rows);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
@ -417,10 +417,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
@ -478,10 +478,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -594,10 +594,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
@ -653,10 +653,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -768,10 +768,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
@ -827,10 +827,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -959,10 +959,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
calcDistanceUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, allDist);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
@ -1022,10 +1022,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
calcDistance<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, allDist);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -1115,11 +1115,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
for (int i = 0; i < k; ++i)
|
||||
{
|
||||
findBestMatch<BLOCK_SIZE><<<grid, block, 0, stream>>>(allDist, i, trainIdx, distance);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void findKnnMatchDispatcher(int k, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, cudaStream_t stream)
|
||||
|
@ -177,10 +177,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
@ -236,10 +236,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -335,10 +335,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||
@ -392,10 +392,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -490,10 +490,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, train, mask, trainIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||
@ -546,10 +546,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const size_t smemSize = (3 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
|
||||
|
||||
match<BLOCK_SIZE, Dist><<<grid, block, smemSize, stream>>>(query, trains, n, mask, trainIdx.data, imgIdx.data, distance.data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -122,10 +122,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||
@ -153,11 +153,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
matchUnrolled<BLOCK_SIZE, MAX_DESC_LEN, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -230,10 +230,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
match<BLOCK_SIZE, false, Dist><<<grid, block, smemSize, stream>>>(query, 0, train, maxDistance, mask,
|
||||
trainIdx, PtrStepi(), distance, nMatches.data, trainIdx.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||
@ -261,11 +261,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
match<BLOCK_SIZE, true, Dist><<<grid, block, smemSize, stream>>>(query, i, train, maxDistance, WithOutMask(),
|
||||
trainIdx, imgIdx, distance, nMatches.data, trainIdx.cols);
|
||||
}
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -62,15 +62,15 @@ namespace cv { namespace gpu { namespace cudev {
|
||||
void loadConstants(int width, int height, float minVal, float maxVal, int quantizationLevels, float backgroundPrior,
|
||||
float decisionThreshold, int maxFeatures, int numInitializationFrames)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_width, &width, sizeof(width)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_height, &height, sizeof(height)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_minVal, &minVal, sizeof(minVal)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_maxVal, &maxVal, sizeof(maxVal)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_quantizationLevels, &quantizationLevels, sizeof(quantizationLevels)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_backgroundPrior, &backgroundPrior, sizeof(backgroundPrior)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_decisionThreshold, &decisionThreshold, sizeof(decisionThreshold)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_maxFeatures, &maxFeatures, sizeof(maxFeatures)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_numInitializationFrames, &numInitializationFrames, sizeof(numInitializationFrames)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_width, &width, sizeof(width)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_height, &height, sizeof(height)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_minVal, &minVal, sizeof(minVal)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_maxVal, &maxVal, sizeof(maxVal)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_quantizationLevels, &quantizationLevels, sizeof(quantizationLevels)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_backgroundPrior, &backgroundPrior, sizeof(backgroundPrior)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_decisionThreshold, &decisionThreshold, sizeof(decisionThreshold)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_maxFeatures, &maxFeatures, sizeof(maxFeatures)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_numInitializationFrames, &numInitializationFrames, sizeof(numInitializationFrames)) );
|
||||
}
|
||||
|
||||
__device__ float findFeature(const int color, const PtrStepi& colors, const PtrStepf& weights, const int x, const int y, const int nfeatures)
|
||||
@ -230,14 +230,14 @@ namespace cv { namespace gpu { namespace cudev {
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
|
||||
|
||||
update<SrcT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void update_gpu<uchar >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||
|
@ -180,16 +180,16 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
|
||||
mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
|
||||
weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
|
||||
nmixtures, varThreshold, backgroundRatio);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -333,16 +333,16 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
|
||||
mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
|
||||
weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
|
||||
nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -406,13 +406,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
|
||||
|
||||
getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
|
||||
@ -445,15 +445,15 @@ namespace cv { namespace gpu { namespace cudev
|
||||
varMin = ::fminf(varMin, varMax);
|
||||
varMax = ::fmaxf(varMin, varMax);
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) );
|
||||
}
|
||||
|
||||
template <bool detectShadows, typename SrcT, typename WorkT>
|
||||
@ -665,7 +665,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
if (detectShadows)
|
||||
{
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
|
||||
mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
|
||||
weight, variance, (PtrStepSz<WorkT>) mean,
|
||||
@ -673,17 +673,17 @@ namespace cv { namespace gpu { namespace cudev
|
||||
}
|
||||
else
|
||||
{
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||
|
||||
mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
|
||||
weight, variance, (PtrStepSz<WorkT>) mean,
|
||||
alphaT, alpha1, prune);
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
|
||||
@ -737,13 +737,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
|
||||
|
||||
getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
|
||||
|
@ -135,12 +135,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
|
||||
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
|
||||
cvCudaSafeCall ( cudaGetLastError () );
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
|
@ -73,10 +73,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
|
||||
@ -109,10 +109,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
} // namespace blend
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
@ -75,10 +75,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float* transl, PtrStepSz<float3> dst,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
cv::gpu::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
|
||||
}
|
||||
} // namespace transform_points
|
||||
@ -114,12 +114,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float* transl, const float* proj, PtrStepSz<float2> dst,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
||||
cv::gpu::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
|
||||
}
|
||||
} // namespace project_points
|
||||
@ -174,17 +174,17 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float3* transl_vectors, const float3* object, const float2* image,
|
||||
const float dist_threshold, int* hypothesis_scores)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3)));
|
||||
|
||||
dim3 threads(256);
|
||||
dim3 grid(num_hypotheses);
|
||||
|
||||
computeHypothesisScoresKernel<256><<<grid, threads>>>(
|
||||
num_points, object, image, dist_threshold, hypothesis_scores);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
} // namespace solvepnp_ransac
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
@ -141,9 +141,9 @@ namespace canny
|
||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall(cudaThreadSynchronize());
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
||||
@ -227,9 +227,9 @@ namespace canny
|
||||
bindTexture(&tex_mag, mag);
|
||||
|
||||
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -324,17 +324,17 @@ namespace canny
|
||||
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
|
||||
|
||||
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -435,24 +435,24 @@ namespace canny
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
||||
|
||||
int count;
|
||||
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
while (count > 0)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(128);
|
||||
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
|
||||
|
||||
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
std::swap(st1, st2);
|
||||
}
|
||||
|
@ -215,9 +215,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Int_t inInt(lo, hi);
|
||||
computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const PtrStepSz<T> >(image), edges, inInt);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void computeEdges<uchar> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||
@ -503,7 +503,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
|
||||
|
||||
lableTiles<<<grid, block, 0, stream>>>(edges, comps);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS;
|
||||
while (grid.x > 1 || grid.y > 1)
|
||||
@ -517,16 +517,16 @@ namespace cv { namespace gpu { namespace cudev
|
||||
tileSizeY <<= 1;
|
||||
grid = mergeGrid;
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
grid.x = divUp(edges.cols, block.x);
|
||||
grid.y = divUp(edges.rows, block.y);
|
||||
flatten<<<grid, block, 0, stream>>>(edges, comps);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
} } }
|
||||
|
@ -128,10 +128,10 @@ namespace clahe
|
||||
|
||||
calcLutKernel<<<grid, block, 0, stream>>>(src, lut, tileSize, tilesX, clipLimit, lutScale);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void tranformKernel(const PtrStepSzb src, PtrStepb dst, const PtrStepb lut, const int2 tileSize, const int tilesX, const int tilesY)
|
||||
@ -173,13 +173,13 @@ namespace clahe
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(tranformKernel, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(tranformKernel, cudaFuncCachePreferL1) );
|
||||
|
||||
tranformKernel<<<grid, block, 0, stream>>>(src, dst, lut, tileSize, tilesX, tilesY);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -169,10 +169,10 @@ namespace column_filter
|
||||
|
||||
linearColumnFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -363,9 +363,9 @@ namespace filter
|
||||
};
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(column_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
@ -70,10 +70,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
BorderReader< PtrStep<T>, B<T> > brdSrc(src, brd);
|
||||
|
||||
copyMakeBorder<<<grid, block, 0, stream>>>(brdSrc, dst, top, left);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -347,13 +347,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
@ -364,13 +364,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
|
||||
|
||||
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||
@ -530,10 +530,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
bindTexture(&sourceTex, src);
|
||||
|
||||
MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
|
||||
|
@ -61,16 +61,16 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
||||
size_t table_space_step = table_space.step / sizeof(float);
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
|
||||
}
|
||||
|
||||
template <int channels>
|
||||
@ -191,20 +191,20 @@ namespace cv { namespace gpu { namespace cudev
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -212,7 +212,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void disp_bilateral_filter<uchar>(PtrStepSz<uchar> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||
|
@ -282,7 +282,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
|
||||
dim3 block(32, 8);
|
||||
|
||||
@ -290,7 +290,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.x = divUp(img.cols - 6, block.x);
|
||||
grid.y = divUp(img.rows - 6, block.y);
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
|
||||
if (score.data)
|
||||
{
|
||||
@ -307,12 +307,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int count;
|
||||
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -359,22 +359,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
|
||||
dim3 block(256);
|
||||
|
||||
dim3 grid;
|
||||
grid.x = divUp(count, block.x);
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
|
||||
nonmaxSupression<<<grid, block>>>(kpLoc, count, score, loc, response);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int new_count;
|
||||
cvCudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return new_count;
|
||||
}
|
||||
|
@ -205,13 +205,13 @@ namespace bgfg
|
||||
|
||||
calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>(
|
||||
(PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, partialBuf0, partialBuf1, partialBuf2);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
mergeHistogram<<<HISTOGRAM_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(partialBuf0, partialBuf1, partialBuf2, hist0, hist1, hist2);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, bool cc20, cudaStream_t stream);
|
||||
@ -251,10 +251,10 @@ namespace bgfg
|
||||
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
|
||||
|
||||
calcDiffThreshMask<PT, CT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, bestThres, changeMask);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void calcDiffThreshMask_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
|
||||
@ -269,7 +269,7 @@ namespace bgfg
|
||||
|
||||
void setBGPixelStat(const BGPixelStat& stat)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_stat, &stat, sizeof(BGPixelStat)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_stat, &stat, sizeof(BGPixelStat)) );
|
||||
}
|
||||
|
||||
template <typename T> struct Output;
|
||||
@ -374,15 +374,15 @@ namespace bgfg
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) );
|
||||
|
||||
bgfgClassification<PT, CT, OT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame,
|
||||
Ftd, Fbd, foreground,
|
||||
deltaC, deltaCC, alpha2, N1c, N1cc);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void bgfgClassification_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||
@ -765,17 +765,17 @@ namespace bgfg
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) );
|
||||
|
||||
updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb><<<grid, block, 0, stream>>>(
|
||||
prevFrame.cols, prevFrame.rows,
|
||||
prevFrame, curFrame,
|
||||
Ftd, Fbd, foreground, background,
|
||||
deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -94,9 +94,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
|
||||
bindTexture(&eigTex, eig);
|
||||
|
||||
@ -108,12 +108,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
else
|
||||
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int count;
|
||||
cvCudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return std::min(count, max_count);
|
||||
}
|
||||
|
@ -98,8 +98,8 @@ void calcWobbleSuppressionMaps(
|
||||
int left, int idx, int right, int width, int height,
|
||||
const float *ml, const float *mr, PtrStepSzf mapx, PtrStepSzf mapy)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(cml, ml, 9*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(cmr, mr, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cml, ml, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cmr, mr, 9*sizeof(float)));
|
||||
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
|
||||
@ -107,8 +107,8 @@ void calcWobbleSuppressionMaps(
|
||||
calcWobbleSuppressionMapsKernel<<<grid, threads>>>(
|
||||
left, idx, right, width, height, mapx, mapy);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
}}}}
|
||||
|
@ -100,10 +100,10 @@ namespace hist
|
||||
const dim3 grid(divUp(src.rows, block.y));
|
||||
|
||||
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,9 +140,9 @@ namespace hist
|
||||
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
const float scale = 255.0f / (src.cols * src.rows);
|
||||
|
||||
|
@ -90,23 +90,23 @@ namespace cv { namespace gpu { namespace cudev
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
|
||||
|
||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
|
||||
|
||||
int block_hist_size_2up = power_2up(block_hist_size);
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) );
|
||||
|
||||
int descr_width = nblocks_win_x * block_hist_size;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdescr_width, &descr_width, sizeof(descr_width)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdescr_width, &descr_width, sizeof(descr_width)) );
|
||||
|
||||
int descr_size = descr_width * nblocks_win_y;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
|
||||
}
|
||||
|
||||
|
||||
@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(img_block_width, nblocks), img_block_height);
|
||||
dim3 threads(32, 2, nblocks);
|
||||
|
||||
cvCudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
|
||||
cudaFuncCachePreferL1));
|
||||
|
||||
// Precompute gaussian spatial window parameter
|
||||
@ -217,9 +217,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int smem = hists_size + final_hists_size;
|
||||
compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -318,9 +318,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
else
|
||||
CV_Error(cv::Error::StsBadArg, "normalize_hists: histogram's size is too big, try to decrease number of bins");
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -378,7 +378,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 threads(nthreads, 1, nblocks);
|
||||
dim3 grid(divUp(img_win_width, nblocks), img_win_height);
|
||||
|
||||
cvCudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
|
||||
cudaFuncCachePreferL1));
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
||||
@ -386,7 +386,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
|
||||
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
|
||||
block_hists, coefs, free_coef, threshold, confidences);
|
||||
cvCudaSafeCall(cudaThreadSynchronize());
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -440,15 +440,15 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 threads(nthreads, 1, nblocks);
|
||||
dim3 grid(divUp(img_win_width, nblocks), img_win_height);
|
||||
|
||||
cvCudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
|
||||
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
|
||||
block_hists, coefs, free_coef, threshold, labels);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -491,9 +491,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
|
||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -540,9 +540,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
|
||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -666,9 +666,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
else
|
||||
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <int nthreads, int correct_gamma>
|
||||
@ -739,9 +739,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
else
|
||||
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -782,13 +782,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int colOfs = 0;
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cvCudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
|
||||
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
|
||||
|
||||
if (texOfs != 0)
|
||||
{
|
||||
colOfs = static_cast<int>( texOfs/sizeof(T) );
|
||||
cvCudaSafeCall( cudaUnbindTexture(tex) );
|
||||
cvCudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
|
||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
|
||||
}
|
||||
|
||||
dim3 threads(32, 8);
|
||||
@ -798,11 +798,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
float sy = static_cast<float>(src.rows) / dst.rows;
|
||||
|
||||
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (PtrStepSz<T>)dst, colOfs);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cvCudaSafeCall( cudaUnbindTexture(tex) );
|
||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||
}
|
||||
|
||||
void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
||||
|
@ -122,22 +122,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const int PIXELS_PER_THREAD = 16;
|
||||
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 4);
|
||||
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
|
||||
|
||||
buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return totalCount;
|
||||
}
|
||||
@ -225,9 +225,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
else
|
||||
linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -264,22 +264,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
|
||||
|
||||
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
totalCount = ::min(totalCount, maxSize);
|
||||
|
||||
@ -462,9 +462,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
|
||||
@ -476,12 +476,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
rho, theta,
|
||||
lineGap, lineLength,
|
||||
mask.rows, mask.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
totalCount = ::min(totalCount, maxSize);
|
||||
|
||||
@ -548,12 +548,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 block(256);
|
||||
const dim3 grid(divUp(count, block.x));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
|
||||
|
||||
circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -586,22 +586,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
|
||||
|
||||
buildCentersList<<<grid, block>>>(accum, centers, threshold);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return totalCount;
|
||||
}
|
||||
@ -662,9 +662,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(has20 ? 1024 : 512);
|
||||
const dim3 grid(centersCount);
|
||||
@ -673,12 +673,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
size_t smemSize = (histSize + 2) * sizeof(int);
|
||||
|
||||
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
totalCount = ::min(totalCount, maxCircles);
|
||||
|
||||
@ -768,22 +768,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const int PIXELS_PER_THREAD = 8;
|
||||
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 4);
|
||||
const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
|
||||
|
||||
buildEdgePointList<T, PIXELS_PER_THREAD><<<grid, block>>>(edges, (PtrStepSz<T>) dx, (PtrStepSz<T>) dy, coordList, thetaList);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
return totalCount;
|
||||
}
|
||||
@ -824,9 +824,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float thetaScale = levels / (2.0f * CV_PI_F);
|
||||
|
||||
buildRTable<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -877,9 +877,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float thetaScale = levels / (2.0f * CV_PI_F);
|
||||
|
||||
GHT_Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold)
|
||||
@ -911,22 +911,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
|
||||
|
||||
GHT_Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
totalCount = ::min(totalCount, maxSize);
|
||||
|
||||
@ -989,9 +989,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
hist, rows, cols,
|
||||
minScale, scaleStep, scaleRange,
|
||||
idp, thetaScale);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange,
|
||||
@ -1037,22 +1037,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
float minScale, float scaleStep, float dp, int threshold)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
|
||||
|
||||
GHT_Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
totalCount = ::min(totalCount, maxSize);
|
||||
|
||||
@ -1123,9 +1123,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
hist, rows, cols,
|
||||
minAngle, angleStep, angleRange,
|
||||
idp, thetaScale);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange,
|
||||
@ -1171,22 +1171,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
float minAngle, float angleStep, float dp, int threshold)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
|
||||
|
||||
GHT_Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
totalCount = ::min(totalCount, maxSize);
|
||||
|
||||
@ -1242,7 +1242,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
tbl.r2_data = r2.data;
|
||||
tbl.r2_step = r2.step;
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
|
||||
}
|
||||
void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
|
||||
{
|
||||
@ -1266,7 +1266,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
tbl.r2_data = r2.data;
|
||||
tbl.r2_step = r2.step;
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
|
||||
}
|
||||
|
||||
struct TemplFeatureTable
|
||||
@ -1419,9 +1419,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
sizes, maxSize,
|
||||
xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale,
|
||||
center, maxDist);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
thrust::device_ptr<int> sizesPtr(sizes);
|
||||
thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize));
|
||||
@ -1501,9 +1501,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
GHT_Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist,
|
||||
minAngle, maxAngle, 1.0f / angleStep, angleRange);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist,
|
||||
@ -1566,9 +1566,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
GHT_Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist,
|
||||
angle, angleEpsilon,
|
||||
minScale, maxScale, iScaleStep, scaleRange);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
|
||||
@ -1636,14 +1636,14 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float sinVal = ::sinf(angle);
|
||||
const float cosVal = ::cosf(angle);
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) );
|
||||
|
||||
GHT_Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist,
|
||||
angle, sinVal, cosVal, angleEpsilon, scale,
|
||||
1.0f / dp);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize,
|
||||
@ -1679,24 +1679,24 @@ namespace cv { namespace gpu { namespace cudev
|
||||
float dp, int threshold)
|
||||
{
|
||||
void* counterPtr;
|
||||
cvCudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
|
||||
|
||||
GHT_Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize,
|
||||
angle, angleVotes, scale, scaleVotes,
|
||||
dp, threshold);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
int totalCount;
|
||||
cvCudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
totalCount = ::min(totalCount, maxSize);
|
||||
|
||||
|
@ -154,13 +154,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cvCudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||
|
||||
meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||
}
|
||||
@ -173,13 +173,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cvCudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
|
||||
|
||||
meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||
}
|
||||
@ -295,10 +295,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
|
||||
@ -309,10 +309,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
|
||||
@ -351,13 +351,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
|
||||
|
||||
reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||
@ -464,10 +464,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
break;
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
|
||||
@ -576,10 +576,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
break;
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
////////////////////////////// Column Sum //////////////////////////////////////
|
||||
@ -611,9 +611,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(src.cols, threads.x));
|
||||
|
||||
column_sumKernel_32F<<<grid, threads>>>(src.cols, src.rows, src, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -638,10 +638,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||
|
||||
mulSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, c);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -666,10 +666,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||
|
||||
mulSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, c);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -695,10 +695,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||
|
||||
mulAndScaleSpectrumsKernel<<<grid, threads, 0, stream>>>(a, b, scale, c);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -724,10 +724,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||
|
||||
mulAndScaleSpectrumsKernel_CONJ<<<grid, threads, 0, stream>>>(a, b, scale, c);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@ -837,10 +837,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
||||
float scale, cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
@ -849,9 +849,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<PlaneMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -859,9 +859,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
@ -870,9 +870,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<CylindricalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -880,9 +880,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float)));
|
||||
|
||||
int cols = map_x.cols;
|
||||
int rows = map_x.rows;
|
||||
@ -891,9 +891,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
buildWarpMapsKernel<SphericalMapper><<<grid,threads>>>(tl_u, tl_v, cols, rows, map_x, map_y);
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@ -955,9 +955,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
|
||||
filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
|
||||
cvCudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
if (stream == 0) \
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
@ -988,9 +988,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
};
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
|
||||
}
|
||||
|
@ -367,10 +367,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
// launch 1 block / row
|
||||
const int grid = img.rows;
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
|
||||
|
||||
shfl_integral_horizontal<<<grid, block, 0, stream>>>((const PtrStepSz<uint4>) img, (PtrStepSz<uint4>) integral);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
{
|
||||
@ -378,11 +378,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(integral.cols, block.x), 1);
|
||||
|
||||
shfl_integral_vertical<<<grid, block, 0, stream>>>(integral);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void shfl_integral_vertical(PtrStepSz<unsigned int> buffer, PtrStepSz<unsigned int> integral)
|
||||
@ -452,10 +452,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const int block = blockStep;
|
||||
const int grid = img.rows;
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
|
||||
|
||||
shfl_integral_horizontal<<<grid, block, 0, stream>>>((PtrStepSz<uint4>) img, buffer);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
{
|
||||
@ -463,7 +463,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(integral.cols, block.x), 1);
|
||||
|
||||
shfl_integral_vertical<<<grid, block, 0, stream>>>((PtrStepSz<uint>)buffer, integral);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int block = ncandidates;
|
||||
int smem = block * ( sizeof(int) + sizeof(int4) );
|
||||
disjoin<InSameComponint><<<1, block, smem>>>(candidates, objects, ncandidates, groupThreshold, grouping_eps, nclasses);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
struct Cascade
|
||||
|
@ -114,10 +114,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplateNaiveKernel_CCORR<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
@ -184,10 +184,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplateNaiveKernel_SQDIFF<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||
@ -240,10 +240,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_SQDIFF_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
|
||||
@ -312,10 +312,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_SQDIFF_NORMED_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -355,10 +355,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||
|
||||
matchTemplatePreparedKernel_CCOFF_8U<<<grid, threads, 0, stream>>>(w, h, (float)templ_sum / (w * h), image_sum, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -399,10 +399,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
matchTemplatePreparedKernel_CCOFF_8UC2<<<grid, threads, 0, stream>>>(
|
||||
w, h, (float)templ_sum_r / (w * h), (float)templ_sum_g / (w * h),
|
||||
image_sum_r, image_sum_g, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -457,10 +457,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
(float)templ_sum_g / (w * h),
|
||||
(float)templ_sum_b / (w * h),
|
||||
image_sum_r, image_sum_g, image_sum_b, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -525,10 +525,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
(float)templ_sum_a / (w * h),
|
||||
image_sum_r, image_sum_g, image_sum_b, image_sum_a,
|
||||
result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
@ -574,10 +574,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
matchTemplatePreparedKernel_CCOFF_NORMED_8U<<<grid, threads, 0, stream>>>(
|
||||
w, h, weight, templ_sum_scale, templ_sqsum_scale,
|
||||
image_sum, image_sqsum, result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -640,10 +640,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
image_sum_r, image_sqsum_r,
|
||||
image_sum_g, image_sqsum_g,
|
||||
result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -720,10 +720,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
image_sum_g, image_sqsum_g,
|
||||
image_sum_b, image_sqsum_b,
|
||||
result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -812,10 +812,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
image_sum_b, image_sqsum_b,
|
||||
image_sum_a, image_sqsum_a,
|
||||
result);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
@ -860,10 +860,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
break;
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
@ -904,10 +904,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
extractFirstChannel_32F<4><<<grid, threads, 0, stream>>>(image, result);
|
||||
break;
|
||||
}
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
} //namespace match_template
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
@ -145,10 +145,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
||||
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
|
||||
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||
@ -194,10 +194,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
|
||||
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||
|
@ -432,12 +432,12 @@ namespace sum
|
||||
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, SingleMask(mask), op, twidth, theight);
|
||||
else
|
||||
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, WithOutMask(), op, twidth, theight);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
R result[4] = {0, 0, 0, 0};
|
||||
cvCudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) );
|
||||
|
||||
out[0] = result[0];
|
||||
out[1] = result[1];
|
||||
@ -761,13 +761,13 @@ namespace minMax
|
||||
else
|
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
R minval_, maxval_;
|
||||
cvCudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
|
||||
cvCudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
|
||||
*minval = minval_;
|
||||
*maxval = maxval_;
|
||||
}
|
||||
@ -934,22 +934,22 @@ namespace minMaxLoc
|
||||
else
|
||||
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
T minval_, maxval_;
|
||||
cvCudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
cvCudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
*minval = minval_;
|
||||
*maxval = maxval_;
|
||||
|
||||
unsigned int minloc_, maxloc_;
|
||||
cvCudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cvCudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
|
||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
||||
}
|
||||
@ -1065,15 +1065,15 @@ namespace countNonZero
|
||||
|
||||
unsigned int* count_buf = buf.ptr(0);
|
||||
|
||||
cvCudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) );
|
||||
cudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) );
|
||||
|
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, count_buf, twidth, theight);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int count;
|
||||
cvCudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost));
|
||||
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost));
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -1236,10 +1236,10 @@ namespace reduce
|
||||
|
||||
Op op;
|
||||
rowsKernel<T, S, D, Op><<<grid, block, 0, stream>>>(src, dst, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D>
|
||||
@ -1316,10 +1316,10 @@ namespace reduce
|
||||
|
||||
Op op;
|
||||
colsKernel<BLOCK_SIZE, T, S, D, cn, Op><<<grid, block, 0, stream>>>((PtrStepSz<typename TypeVec<T, cn>::vec_type>) src, (typename TypeVec<D, cn>::vec_type*) dst, op);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
}
|
||||
|
||||
|
@ -146,12 +146,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
float minus_h2_inv = -1.f/(h * h * VecTraits<T>::cn);
|
||||
float noise_mult = minus_h2_inv/(block_window * block_window);
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel<T, B<T> >, cudaFuncCachePreferL1) );
|
||||
nlm_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, search_radius, block_radius, noise_mult);
|
||||
cvCudaSafeCall ( cudaGetLastError () );
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@ -505,9 +505,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
fast_nlm_kernel<<<grid, block, smem>>>(fnlm, (PtrStepSz<T>)dst);
|
||||
cvCudaSafeCall ( cudaGetLastError () );
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void nlm_fast_gpu<uchar>(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t);
|
||||
@ -535,9 +535,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y));
|
||||
|
||||
fnlm_split_kernel<<<g, b>>>(lab, l, ab);
|
||||
cvCudaSafeCall ( cudaGetLastError () );
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void fnlm_merge_kernel(const PtrStepb l, const PtrStep<uchar2> ab, PtrStepSz<uchar3> lab)
|
||||
@ -558,9 +558,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y));
|
||||
|
||||
fnlm_merge_kernel<<<g, b>>>(l, ab, lab);
|
||||
cvCudaSafeCall ( cudaGetLastError () );
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
@ -159,10 +159,10 @@ namespace optflowbm
|
||||
|
||||
calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious,
|
||||
maxX, maxY, acceptLevel, escapeLevel, ss, ssCount);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -402,10 +402,10 @@ namespace optflowbm_fast
|
||||
size_t smem = search_window * search_window * sizeof(int);
|
||||
|
||||
optflowbm_fast_kernel<<<grid, block, smem, stream>>>(fbm, velx, vely);
|
||||
cvCudaSafeCall ( cudaGetLastError () );
|
||||
cudaSafeCall ( cudaGetLastError () );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void calc<uchar>(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream);
|
||||
|
@ -119,9 +119,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(u_avg.cols, u_avg.rows);
|
||||
|
||||
NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
|
||||
@ -210,9 +210,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
|
||||
|
||||
NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
@ -123,13 +123,13 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
int polyN, const float *g, const float *xg, const float *xxg,
|
||||
float ig11, float ig03, float ig33, float ig55)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_g, g, (polyN + 1) * sizeof(*g)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_xg, xg, (polyN + 1) * sizeof(*xg)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_xxg, xxg, (polyN + 1) * sizeof(*xxg)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig11, &ig11, sizeof(ig11)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig03, &ig03, sizeof(ig03)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig33, &ig33, sizeof(ig33)));
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_ig55, &ig55, sizeof(ig55)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_g, g, (polyN + 1) * sizeof(*g)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_xg, xg, (polyN + 1) * sizeof(*xg)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_xxg, xxg, (polyN + 1) * sizeof(*xxg)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig11, &ig11, sizeof(ig11)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig03, &ig03, sizeof(ig03)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig33, &ig33, sizeof(ig33)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_ig55, &ig55, sizeof(ig55)));
|
||||
}
|
||||
|
||||
|
||||
@ -144,10 +144,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
else if (polyN == 7)
|
||||
polynomialExpansion<7><<<grid, block, smem, stream>>>(src.rows, src.cols, src, dst);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -244,7 +244,7 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
void setUpdateMatricesConsts()
|
||||
{
|
||||
static const float border[BORDER_SIZE + 1] = {0.14f, 0.14f, 0.4472f, 0.4472f, 0.4472f, 1.f};
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_border, border, (BORDER_SIZE + 1) * sizeof(*border)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_border, border, (BORDER_SIZE + 1) * sizeof(*border)));
|
||||
}
|
||||
|
||||
|
||||
@ -257,10 +257,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
updateMatrices<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, flowx, flowy, R0, R1, M);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -293,10 +293,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
updateFlow<<<grid, block, 0, stream>>>(flowx.rows, flowx.cols, M, flowx, flowy);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -424,10 +424,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
|
||||
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -443,10 +443,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
float boxAreaInv = 1.f / ((1 + 2*ksizeHalf) * (1 + 2*ksizeHalf));
|
||||
boxFilter5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, boxAreaInv, dst);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -494,7 +494,7 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
void setGaussianBlurKernel(const float *gKer, int ksizeHalf)
|
||||
{
|
||||
cvCudaSafeCall(cudaMemcpyToSymbol(c_gKer, gKer, (ksizeHalf + 1) * sizeof(*gKer)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(c_gKer, gKer, (ksizeHalf + 1) * sizeof(*gKer)));
|
||||
}
|
||||
|
||||
|
||||
@ -511,10 +511,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
gaussianBlur<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -606,10 +606,10 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
gaussianBlur5<<<grid, block, smem, stream>>>(height, width, src, ksizeHalf, b, dst);
|
||||
|
||||
cvCudaSafeCall(cudaGetLastError());
|
||||
cudaSafeCall(cudaGetLastError());
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -132,10 +132,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
HarrisResponses<<<grid, block, 0, stream>>>(img, loc, response, npoints, blockSize, harris_k);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -145,7 +145,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
void loadUMax(const int* u_max, int count)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_u_max, u_max, count * sizeof(int)) );
|
||||
}
|
||||
|
||||
__global__ void IC_Angle(const PtrStepb image, const short2* loc_, float* angle, const int npoints, const int half_k)
|
||||
@ -214,10 +214,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
IC_Angle<<<grid, block, 0, stream>>>(image, loc, angle, npoints, half_k);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -382,10 +382,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
break;
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -413,10 +413,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
mergeLocation<<<grid, block, 0, stream>>>(loc, x, y, npoints, scale);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
@ -181,10 +181,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
B<T> b(src.rows, src.cols);
|
||||
|
||||
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
|
@ -150,10 +150,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
pyrUp<<<grid, block, 0, stream>>>(src, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
|
@ -320,10 +320,10 @@ namespace pyrlk
|
||||
else
|
||||
sparseKernel<cn, PATCH_X, PATCH_Y, false><<<grid, block>>>(prevPts, nextPts, status, err, level, rows, cols);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <bool calcErr>
|
||||
@ -474,14 +474,14 @@ namespace pyrlk
|
||||
|
||||
void loadConstants(int2 winSize, int iters)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
|
||||
|
||||
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
|
||||
}
|
||||
|
||||
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||
@ -544,16 +544,16 @@ namespace pyrlk
|
||||
if (err.data)
|
||||
{
|
||||
denseKernel<true><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, err, I.rows, I.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
else
|
||||
{
|
||||
denseKernel<false><<<grid, block, smem_size, stream>>>(u, v, prevU, prevV, PtrStepf(), I.rows, I.cols);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -81,7 +81,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -102,9 +102,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -135,8 +135,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
cvCudaSafeCall( cudaGetLastError() ); \
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||
@ -160,8 +160,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
cvCudaSafeCall( cudaGetLastError() ); \
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
|
@ -92,7 +92,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -107,9 +107,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -123,9 +123,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -145,9 +145,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -185,8 +185,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
|
||||
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
|
||||
} \
|
||||
cvCudaSafeCall( cudaGetLastError() ); \
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
|
@ -140,9 +140,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
Gray_to_YV12<<<grid, block>>>(src, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <int cn>
|
||||
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||
@ -153,9 +153,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
|
||||
|
@ -168,10 +168,10 @@ namespace row_filter
|
||||
B<T> brd(src.cols);
|
||||
|
||||
linearRowFilter<KSIZE, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, brd);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -362,9 +362,9 @@ namespace filter
|
||||
};
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpyToSymbolAsync(row_filter::c_kernel, kernel, ksize * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
||||
|
||||
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||
}
|
||||
|
@ -236,10 +236,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
src[0].data, src[0].step,
|
||||
src[1].data, src[1].step,
|
||||
dst.rows, dst.cols, dst.data, dst.step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -253,10 +253,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
src[1].data, src[1].step,
|
||||
src[2].data, src[2].step,
|
||||
dst.rows, dst.cols, dst.data, dst.step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -271,10 +271,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
src[2].data, src[2].step,
|
||||
src[3].data, src[3].step,
|
||||
dst.rows, dst.cols, dst.data, dst.step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -445,10 +445,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
src.data, src.step, src.rows, src.cols,
|
||||
dst[0].data, dst[0].step,
|
||||
dst[1].data, dst[1].step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -462,10 +462,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dst[0].data, dst[0].step,
|
||||
dst[1].data, dst[1].step,
|
||||
dst[2].data, dst[2].step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@ -480,10 +480,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dst[1].data, dst[1].step,
|
||||
dst[2].data, dst[2].step,
|
||||
dst[3].data, dst[3].step);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall(cudaDeviceSynchronize());
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
@ -322,10 +322,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
|
||||
|
||||
stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
};
|
||||
|
||||
typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream);
|
||||
@ -353,15 +353,15 @@ namespace cv { namespace gpu { namespace cudev
|
||||
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
|
||||
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
|
||||
|
||||
cvCudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
|
||||
cvCudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
|
||||
cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
|
||||
cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
|
||||
|
||||
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
||||
|
||||
callers[winsz2](left, right, disp, maxdisp, stream);
|
||||
}
|
||||
@ -392,7 +392,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cvCudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
||||
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
@ -401,12 +401,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(input.rows, threads.y);
|
||||
|
||||
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cvCudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
||||
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
||||
}
|
||||
|
||||
|
||||
@ -516,7 +516,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
texForTF.addressMode[1] = cudaAddressModeWrap;
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cvCudaSafeCall( cudaBindTexture2D( 0, texForTF, input.data, desc, input.cols, input.rows, input.step ) );
|
||||
cudaSafeCall( cudaBindTexture2D( 0, texForTF, input.data, desc, input.cols, input.rows, input.step ) );
|
||||
|
||||
dim3 threads(128, 1, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
@ -526,12 +526,12 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float);
|
||||
textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cvCudaSafeCall( cudaUnbindTexture (texForTF) );
|
||||
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
||||
}
|
||||
} // namespace stereobm
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
@ -62,11 +62,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -172,10 +172,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<1, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <> void comp_data_gpu<uchar, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
@ -186,10 +186,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<1, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <> void comp_data_gpu<uchar3, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
@ -201,10 +201,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<3, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <> void comp_data_gpu<uchar3, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
@ -215,10 +215,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<3, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <> void comp_data_gpu<uchar4, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
@ -230,10 +230,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<4, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <> void comp_data_gpu<uchar4, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<4, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -284,10 +284,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(dst_rows, threads.y);
|
||||
|
||||
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||
@ -328,19 +328,19 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int src_idx = (dst_idx + 1) & 1;
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mus[src_idx], (PtrStepSz<T>)mus[dst_idx]);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mds[src_idx], (PtrStepSz<T>)mds[dst_idx]);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mls[src_idx], (PtrStepSz<T>)mls[dst_idx]);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mrs[src_idx], (PtrStepSz<T>)mrs[dst_idx]);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||
@ -459,10 +459,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
for(int t = 0; t < iters; ++t)
|
||||
{
|
||||
one_iteration<T><<<grid, threads, 0, stream>>>(t, elem_step, (T*)u.data, (T*)d.data, (T*)l.data, (T*)r.data, (PtrStepSz<T>)data, cols, rows);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -524,10 +524,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
int elem_step = static_cast<int>(u.step/sizeof(T));
|
||||
|
||||
output<T><<<grid, threads, 0, stream>>>(elem_step, (const T*)u.data, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, disp);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||
|
@ -78,20 +78,20 @@ namespace cv { namespace gpu { namespace cudev
|
||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) );
|
||||
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -362,14 +362,14 @@ namespace cv { namespace gpu { namespace cudev
|
||||
};
|
||||
|
||||
size_t disp_step = msg_step * h;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
@ -382,10 +382,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
else
|
||||
get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
|
||||
@ -546,15 +546,15 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
size_t disp_step1 = msg_step * h;
|
||||
size_t disp_step2 = msg_step * h2;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step,
|
||||
@ -662,9 +662,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
size_t disp_step1 = msg_step * h;
|
||||
size_t disp_step2 = msg_step * h2;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
@ -677,10 +677,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
selected_disp_pyr_new, selected_disp_pyr_cur,
|
||||
data_cost_selected, data_cost,
|
||||
h, w, nr_plane, h2, w2, nr_plane2);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -767,8 +767,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
|
||||
{
|
||||
size_t disp_step = msg_step * h;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
@ -779,10 +779,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
for(int t = 0; t < iters; ++t)
|
||||
{
|
||||
compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
};
|
||||
|
||||
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
|
||||
@ -837,8 +837,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream)
|
||||
{
|
||||
size_t disp_step = disp.rows * msg_step;
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
@ -847,10 +847,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(disp.rows, threads.y);
|
||||
|
||||
compute_disp<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
||||
|
@ -72,9 +72,9 @@ namespace tvl1flow
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,9 +174,9 @@ namespace tvl1flow
|
||||
bindTexture(&tex_I1y, I1y);
|
||||
|
||||
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -280,9 +280,9 @@ namespace tvl1flow
|
||||
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
|
||||
|
||||
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -323,9 +323,9 @@ namespace tvl1flow
|
||||
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
|
||||
|
||||
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, p11, p12, p21, p22, taut);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -102,22 +102,22 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||
|
||||
buildWarpMaps<Transform><<<grid, block, 0, stream>>>(xmap, ymap);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
||||
}
|
||||
|
||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
buildWarpMaps_caller<PerspectiveTransform>(xmap, ymap, stream);
|
||||
}
|
||||
@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block, 0, stream>>>(filter_src, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -174,9 +174,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< PtrStep<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -206,8 +206,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
BorderReader< tex_warp_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
cvCudaSafeCall( cudaGetLastError() ); \
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
}; \
|
||||
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
||||
@ -230,8 +230,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
Filter< BorderReader< tex_warp_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
warp<Transform><<<grid, block>>>(filter_src, dst); \
|
||||
} \
|
||||
cvCudaSafeCall( cudaGetLastError() ); \
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
};
|
||||
|
||||
@ -310,7 +310,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
@ -348,7 +348,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, bool cc20)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||
|
||||
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc20);
|
||||
}
|
||||
|
@ -150,7 +150,7 @@ namespace
|
||||
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int DEPTH, typename NppArithmScalarFunc<DEPTH, 1>::func_ptr func> struct NppArithmScalar<DEPTH, 1, func>
|
||||
@ -168,7 +168,7 @@ namespace
|
||||
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int DEPTH, typename NppArithmScalarFunc<DEPTH, 2>::func_ptr func> struct NppArithmScalar<DEPTH, 2, func>
|
||||
@ -192,7 +192,7 @@ namespace
|
||||
(npp_complex_type*)dst.data, static_cast<int>(dst.step), sz, 0) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int cn, typename NppArithmScalarFunc<CV_32F, cn>::func_ptr func> struct NppArithmScalar<CV_32F, cn, func>
|
||||
@ -212,7 +212,7 @@ namespace
|
||||
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<typename NppArithmScalarFunc<CV_32F, 1>::func_ptr func> struct NppArithmScalar<CV_32F, 1, func>
|
||||
@ -230,7 +230,7 @@ namespace
|
||||
nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<Npp32f>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<typename NppArithmScalarFunc<CV_32F, 2>::func_ptr func> struct NppArithmScalar<CV_32F, 2, func>
|
||||
@ -253,7 +253,7 @@ namespace
|
||||
nppSafeCall( func((const npp_complex_type*)src.data, static_cast<int>(src.step), nConstant, (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -2218,7 +2218,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template <int DEPTH, typename NppBitwiseCFunc<DEPTH, 1>::func_t func> struct NppBitwiseC<DEPTH, 1, func>
|
||||
@ -2236,7 +2236,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -2349,7 +2349,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template <int DEPTH, typename NppShiftFunc<DEPTH, 1>::func_t func> struct NppShift<DEPTH, 1, func>
|
||||
@ -2367,7 +2367,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -2708,7 +2708,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
||||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, static_cast<Npp32f>(thresh), NPP_CMP_GREATER) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2805,7 +2805,7 @@ namespace
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -288,7 +288,7 @@ namespace
|
||||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -322,7 +322,7 @@ namespace
|
||||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, ksize, anchor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -368,7 +368,7 @@ namespace
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
nppFilterBox_t func;
|
||||
@ -436,7 +436,7 @@ namespace
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, kernel.ptr<Npp8u>(), oKernelSize, oAnchor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
GpuMat kernel;
|
||||
@ -702,7 +702,7 @@ namespace
|
||||
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
GpuMat kernel;
|
||||
@ -865,7 +865,7 @@ namespace
|
||||
kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
GpuMat kernel;
|
||||
@ -967,7 +967,7 @@ namespace
|
||||
kernel.ptr<Npp32s>(), ksize, anchor, nDivisor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
GpuMat kernel;
|
||||
@ -1307,7 +1307,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, oKernelSize, oAnchor) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
nppFilterRank_t func;
|
||||
|
@ -198,7 +198,7 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
|
||||
#endif
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight,
|
||||
@ -276,7 +276,7 @@ void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTrans
|
||||
#endif
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -278,7 +278,7 @@ void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf&
|
||||
ushort2* oldBuf = oldBuf_;
|
||||
ushort2* newBuf = newBuf_;
|
||||
|
||||
cvCudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
|
||||
|
||||
const int cellSize = cvRound(minDist);
|
||||
const int gridWidth = (src.cols + cellSize - 1) / cellSize;
|
||||
@ -338,7 +338,7 @@ void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf&
|
||||
}
|
||||
}
|
||||
|
||||
cvCudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
|
||||
centersCount = newCount;
|
||||
}
|
||||
|
||||
@ -603,8 +603,8 @@ namespace
|
||||
oldPosBuf.resize(posCount);
|
||||
oldVoteBuf.resize(posCount);
|
||||
|
||||
cvCudaSafeCall( cudaMemcpy(&oldPosBuf[0], outBuf.ptr(0), posCount * sizeof(float4), cudaMemcpyDeviceToHost) );
|
||||
cvCudaSafeCall( cudaMemcpy(&oldVoteBuf[0], outBuf.ptr(1), posCount * sizeof(int3), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&oldPosBuf[0], outBuf.ptr(0), posCount * sizeof(float4), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&oldVoteBuf[0], outBuf.ptr(1), posCount * sizeof(int3), cudaMemcpyDeviceToHost) );
|
||||
|
||||
indexies.resize(posCount);
|
||||
for (int i = 0; i < posCount; ++i)
|
||||
@ -677,8 +677,8 @@ namespace
|
||||
}
|
||||
|
||||
posCount = static_cast<int>(newPosBuf.size());
|
||||
cvCudaSafeCall( cudaMemcpy(outBuf.ptr(0), &newPosBuf[0], posCount * sizeof(float4), cudaMemcpyHostToDevice) );
|
||||
cvCudaSafeCall( cudaMemcpy(outBuf.ptr(1), &newVoteBuf[0], posCount * sizeof(int3), cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy(outBuf.ptr(0), &newPosBuf[0], posCount * sizeof(float4), cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy(outBuf.ptr(1), &newVoteBuf[0], posCount * sizeof(int3), cudaMemcpyHostToDevice) );
|
||||
}
|
||||
|
||||
void GHT_Pos::convertTo(GpuMat& positions)
|
||||
@ -1153,7 +1153,7 @@ namespace
|
||||
true, templCenter);
|
||||
|
||||
h_buf.resize(templFeatures.sizes.cols);
|
||||
cvCudaSafeCall( cudaMemcpy(&h_buf[0], templFeatures.sizes.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&h_buf[0], templFeatures.sizes.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
templFeatures.maxSize = *max_element(h_buf.begin(), h_buf.end());
|
||||
}
|
||||
|
||||
@ -1279,7 +1279,7 @@ namespace
|
||||
hist.setTo(Scalar::all(0));
|
||||
GHT_Guil_Full_calcOHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0),
|
||||
hist.ptr<int>(), (float)minAngle, (float)maxAngle, (float)angleStep, angleRange, levels, templFeatures.maxSize);
|
||||
cvCudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
angles.clear();
|
||||
|
||||
@ -1303,7 +1303,7 @@ namespace
|
||||
hist.setTo(Scalar::all(0));
|
||||
GHT_Guil_Full_calcSHist_gpu(templFeatures.sizes.ptr<int>(), imageFeatures.sizes.ptr<int>(0),
|
||||
hist.ptr<int>(), (float)angle, (float)angleEpsilon, (float)minScale, (float)maxScale, (float)iScaleStep, scaleRange, levels, templFeatures.maxSize);
|
||||
cvCudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&h_buf[0], hist.data, h_buf.size() * sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
|
||||
scales.clear();
|
||||
|
||||
|
@ -320,7 +320,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -494,7 +494,7 @@ namespace
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), dstroi, angle, xShift, yShift, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -581,7 +581,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
|
||||
roiSize.height = src.rows;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
@ -594,7 +594,7 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
|
||||
sum.ptr<Ncv32u>(), static_cast<int>(sum.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
@ -610,7 +610,7 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
|
||||
roiSize.height = src.rows;
|
||||
|
||||
cudaDeviceProp prop;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );
|
||||
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
|
||||
@ -625,7 +625,7 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
|
||||
sqsum.ptr<Ncv64u>(0), static_cast<int>(sqsum.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -674,7 +674,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons
|
||||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz, nppRect) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@ -726,7 +726,7 @@ namespace
|
||||
lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
|
||||
@ -758,7 +758,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buffer.ptr<Npp8u>()) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
@ -826,7 +826,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buffer.ptr<Npp8u>()) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
|
||||
@ -866,7 +866,7 @@ namespace
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buffer.ptr<Npp8u>()) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -80,11 +80,11 @@ namespace
|
||||
public:
|
||||
explicit DeviceBuffer(int count_ = 1) : count(count_)
|
||||
{
|
||||
cvCudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) );
|
||||
cudaSafeCall( cudaMalloc(&pdev, count * sizeof(double)) );
|
||||
}
|
||||
~DeviceBuffer()
|
||||
{
|
||||
cvCudaSafeCall( cudaFree(pdev) );
|
||||
cudaSafeCall( cudaFree(pdev) );
|
||||
}
|
||||
|
||||
operator double*() {return pdev;}
|
||||
@ -92,13 +92,13 @@ namespace
|
||||
void download(double* hptr)
|
||||
{
|
||||
double hbuf;
|
||||
cvCudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&hbuf, pdev, sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
*hptr = hbuf;
|
||||
}
|
||||
void download(double** hptrs)
|
||||
{
|
||||
AutoBuffer<double, 2 * sizeof(double)> hbuf(count);
|
||||
cvCudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy((void*)hbuf, pdev, count * sizeof(double), cudaMemcpyDeviceToHost) );
|
||||
for (int i = 0; i < count; ++i)
|
||||
*hptrs[i] = hbuf[i];
|
||||
}
|
||||
@ -143,7 +143,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat
|
||||
|
||||
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
double* ptrs[2] = {mean.val, stddev.val};
|
||||
dbuf.download(ptrs);
|
||||
@ -205,7 +205,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
|
||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
dbuf.download(&retVal);
|
||||
|
||||
|
@ -216,10 +216,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
kernelDownsampleX2<<<gDim, bDim, 0, stream>>>((T*)src.data, static_cast<Ncv32u>(src.step),
|
||||
(T*)dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows));
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void kernelDownsampleX2_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
@ -291,10 +291,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
kernelInterpolateFrom1<<<gDim, bDim, 0, stream>>>((T*) src.data, static_cast<Ncv32u>(src.step), NcvSize32u(src.cols, src.rows),
|
||||
(T*) dst.data, static_cast<Ncv32u>(dst.step), NcvSize32u(dst.cols, dst.rows));
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void kernelInterpolateFrom1_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
@ -184,9 +184,9 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo
|
||||
|
||||
ensureSizeIsEnough(1, ssCount, CV_16SC2, buf);
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) );
|
||||
cudaSafeCall( cudaMemcpy(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice) );
|
||||
else
|
||||
cvCudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) );
|
||||
cudaSafeCall( cudaMemcpyAsync(buf.data, &ss[0], ssCount * sizeof(short2), cudaMemcpyHostToDevice, stream) );
|
||||
|
||||
const int maxX = prev.cols - blockSize.width;
|
||||
const int maxY = prev.rows - blockSize.height;
|
||||
|
@ -82,7 +82,7 @@ void cv::gpu::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& f
|
||||
v.create(frame0.size(), CV_32FC1);
|
||||
|
||||
cudaDeviceProp devProp;
|
||||
cvCudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
|
||||
cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
|
||||
|
||||
NCVBroxOpticalFlowDescriptor desc;
|
||||
|
||||
@ -185,7 +185,7 @@ void cv::gpu::interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, cons
|
||||
ncvSafeCall( nppiStInterpolateFrames(&state) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
|
@ -133,7 +133,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -170,7 +170,7 @@ namespace
|
||||
coeffs, npp_inter[interpolation]) );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -106,19 +106,19 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_max_candidates, &maxCandidates, sizeof(maxCandidates)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_max_features, &maxFeatures, sizeof(maxFeatures)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_img_rows, &img_rows, sizeof(img_rows)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_img_cols, &img_cols, sizeof(img_cols)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_nOctaveLayers, &nOctaveLayers, sizeof(nOctaveLayers)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_hessianThreshold, &hessianThreshold, sizeof(hessianThreshold)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_max_candidates, &maxCandidates, sizeof(maxCandidates)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_max_features, &maxFeatures, sizeof(maxFeatures)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_img_rows, &img_rows, sizeof(img_rows)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_img_cols, &img_cols, sizeof(img_cols)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_nOctaveLayers, &nOctaveLayers, sizeof(nOctaveLayers)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_hessianThreshold, &hessianThreshold, sizeof(hessianThreshold)) );
|
||||
}
|
||||
|
||||
void loadOctaveConstants(int octave, int layer_rows, int layer_cols)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_octave, &octave, sizeof(octave)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_layer_rows, &layer_rows, sizeof(layer_rows)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_layer_cols, &layer_cols, sizeof(layer_cols)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_octave, &octave, sizeof(octave)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_layer_rows, &layer_rows, sizeof(layer_rows)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_layer_cols, &layer_cols, sizeof(layer_cols)) );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -137,14 +137,14 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
size_t offset;
|
||||
cudaChannelFormatDesc desc_sum = cudaCreateChannelDesc<uint>();
|
||||
cvCudaSafeCall( cudaBindTexture2D(&offset, sumTex, sum.data, desc_sum, sum.cols, sum.rows, sum.step));
|
||||
cudaSafeCall( cudaBindTexture2D(&offset, sumTex, sum.data, desc_sum, sum.cols, sum.rows, sum.step));
|
||||
return offset / sizeof(uint);
|
||||
}
|
||||
size_t bindMaskSumTex(PtrStepSz<uint> maskSum)
|
||||
{
|
||||
size_t offset;
|
||||
cudaChannelFormatDesc desc_sum = cudaCreateChannelDesc<uint>();
|
||||
cvCudaSafeCall( cudaBindTexture2D(&offset, maskSumTex, maskSum.data, desc_sum, maskSum.cols, maskSum.rows, maskSum.step));
|
||||
cudaSafeCall( cudaBindTexture2D(&offset, maskSumTex, maskSum.data, desc_sum, maskSum.cols, maskSum.rows, maskSum.step));
|
||||
return offset / sizeof(uint);
|
||||
}
|
||||
|
||||
@ -245,9 +245,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.y = divUp(max_samples_i, threads.y) * (nOctaveLayers + 2);
|
||||
|
||||
icvCalcLayerDetAndTrace<<<grid, threads>>>(det, trace);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -395,9 +395,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
else
|
||||
icvFindMaximaInLayer<WithOutMask><<<grid, threads, smem_size>>>(det, trace, maxPosBuffer, maxCounter);
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -520,9 +520,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.x = maxCounter;
|
||||
|
||||
icvInterpolateKeypoint<<<grid, threads>>>(det, maxPosBuffer, featureX, featureY, featureLaplacian, featureOctave, featureSize, featureHessian, featureCounter);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -686,9 +686,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
grid.x = nFeatures;
|
||||
|
||||
icvCalcOrientation<<<grid, threads>>>(featureX, featureY, featureSize, featureDir);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
@ -932,26 +932,26 @@ namespace cv { namespace gpu { namespace cudev
|
||||
if (descriptors.cols == 64)
|
||||
{
|
||||
compute_descriptors_64<<<nFeatures, dim3(32, 16)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
normalize_descriptors<64><<<nFeatures, 64>>>((PtrStepSzf) descriptors);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
compute_descriptors_128<<<nFeatures, dim3(32, 16)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
normalize_descriptors<128><<<nFeatures, 128>>>((PtrStepSzf) descriptors);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
} // namespace surf
|
||||
|
@ -69,10 +69,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) );
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) );
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint nextRand(uint& state)
|
||||
@ -137,13 +137,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
||||
|
||||
init<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, (PtrStepSz<SampleT>) samples, randStates);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
|
||||
@ -246,13 +246,13 @@ namespace cv { namespace gpu { namespace cudev
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
||||
|
||||
cvCudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
||||
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
||||
|
||||
update<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, (PtrStepSz<SampleT>) samples, randStates);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
|
||||
|
@ -177,7 +177,7 @@ namespace
|
||||
img_rows, img_cols, octave, use_mask, surf_.nOctaveLayers);
|
||||
|
||||
unsigned int maxCounter;
|
||||
cvCudaSafeCall( cudaMemcpy(&maxCounter, counters.ptr<unsigned int>() + 1 + octave, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&maxCounter, counters.ptr<unsigned int>() + 1 + octave, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
|
||||
|
||||
if (maxCounter > 0)
|
||||
@ -190,7 +190,7 @@ namespace
|
||||
}
|
||||
}
|
||||
unsigned int featureCounter;
|
||||
cvCudaSafeCall( cudaMemcpy(&featureCounter, counters.ptr<unsigned int>(), sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&featureCounter, counters.ptr<unsigned int>(), sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
|
||||
|
||||
keypoints.cols = featureCounter;
|
||||
|
@ -340,7 +340,7 @@ struct cv::softcascade::SCascade::Fields
|
||||
else
|
||||
cudaMemset(objects.data, 0, sizeof(Detection));
|
||||
|
||||
cvCudaSafeCall( cudaGetLastError());
|
||||
cudaSafeCall( cudaGetLastError());
|
||||
|
||||
cudev::CascadeInvoker<cudev::GK107PolicyX4> invoker
|
||||
= cudev::CascadeInvoker<cudev::GK107PolicyX4>(levels, stages, nodes, leaves);
|
||||
|
@ -102,9 +102,9 @@ namespace btv_l1_cudev
|
||||
backwardMotionX, bacwardMotionY,
|
||||
forwardMapX, forwardMapY,
|
||||
backwardMapX, backwardMapY);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -128,10 +128,10 @@ namespace btv_l1_cudev
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
upscaleKernel<src_t><<<grid, block, 0, stream>>>((PtrStepSz<src_t>) src, (PtrStepSz<src_t>) dst, scale);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void upscale<1>(const PtrStepSzb src, PtrStepSzb dst, int scale, cudaStream_t stream);
|
||||
@ -211,7 +211,7 @@ namespace btv_l1_cudev
|
||||
|
||||
void loadBtvWeights(const float* weights, size_t count)
|
||||
{
|
||||
cvCudaSafeCall( cudaMemcpyToSymbol(c_btvRegWeights, weights, count * sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(c_btvRegWeights, weights, count * sizeof(float)) );
|
||||
}
|
||||
|
||||
template <int cn>
|
||||
@ -223,9 +223,9 @@ namespace btv_l1_cudev
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
calcBtvRegularizationKernel<src_t><<<grid, block>>>((PtrStepSz<src_t>) src, (PtrStepSz<src_t>) dst, ksize);
|
||||
cvCudaSafeCall( cudaGetLastError() );
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cvCudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void calcBtvRegularization<1>(PtrStepSzb src, PtrStepSzb dst, int ksize);
|
||||
|
Loading…
Reference in New Issue
Block a user