mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 19:20:28 +08:00
Removed PtrElemStep, Marked DevMem1D as deprecated, now should use PtrStepSz now
This commit is contained in:
parent
9be63c50dd
commit
70204a8e68
@ -1,161 +1,160 @@
|
|||||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
//
|
//
|
||||||
// By downloading, copying, installing or using the software you agree to this license.
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
// If you do not agree to this license, do not download, install,
|
// If you do not agree to this license, do not download, install,
|
||||||
// copy or use the software.
|
// copy or use the software.
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// License Agreement
|
// License Agreement
|
||||||
// For Open Source Computer Vision Library
|
// For Open Source Computer Vision Library
|
||||||
//
|
//
|
||||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
//
|
//
|
||||||
// * Redistribution's of source code must retain the above copyright notice,
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
// this list of conditions and the following disclaimer.
|
// this list of conditions and the following disclaimer.
|
||||||
//
|
//
|
||||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
// and/or other GpuMaterials provided with the distribution.
|
// and/or other GpuMaterials provided with the distribution.
|
||||||
//
|
//
|
||||||
// * The name of the copyright holders may not be used to endorse or promote products
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
// derived from this software without specific prior written permission.
|
// derived from this software without specific prior written permission.
|
||||||
//
|
//
|
||||||
// This software is provided by the copyright holders and contributors "as is" and
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
// any express or implied warranties, including, but not limited to, the implied
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
// indirect, incidental, special, exemplary, or consequential damages
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
// (including, but not limited to, procurement of substitute goods or services;
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
// loss of use, data, or profits; or business interruption) however caused
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
// and on any theory of liability, whether in contract, strict liability,
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
// or tort (including negligence or otherwise) arising in any way out of
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
// the use of this software, even if advised of the possibility of such damage.
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#ifndef __OPENCV_CORE_DevMem2D_HPP__
|
#ifndef __OPENCV_CORE_DEVPTRS_HPP__
|
||||||
#define __OPENCV_CORE_DevMem2D_HPP__
|
#define __OPENCV_CORE_DEVPTRS_HPP__
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
|
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
|
||||||
#else
|
#else
|
||||||
#define __CV_GPU_HOST_DEVICE__
|
#define __CV_GPU_HOST_DEVICE__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
namespace gpu
|
namespace gpu
|
||||||
{
|
{
|
||||||
// Simple lightweight structures that encapsulates information about an image on device.
|
// Simple lightweight structures that encapsulates information about an image on device.
|
||||||
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||||
|
|
||||||
template <bool expr> struct StaticAssert;
|
template <bool expr> struct StaticAssert;
|
||||||
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
|
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
|
||||||
|
|
||||||
template<typename T> struct DevPtr
|
template<typename T> struct DevPtr
|
||||||
{
|
{
|
||||||
typedef T elem_type;
|
typedef T elem_type;
|
||||||
typedef int index_type;
|
typedef int index_type;
|
||||||
|
|
||||||
enum { elem_size = sizeof(elem_type) };
|
enum { elem_size = sizeof(elem_type) };
|
||||||
|
|
||||||
T* data;
|
T* data;
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
|
__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
|
||||||
__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
|
__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
||||||
__CV_GPU_HOST_DEVICE__ operator T*() { return data; }
|
__CV_GPU_HOST_DEVICE__ operator T*() { return data; }
|
||||||
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
|
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T> struct PtrSz : public DevPtr<T>
|
template<typename T> struct PtrSz : public DevPtr<T>
|
||||||
{
|
{
|
||||||
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
|
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
|
||||||
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
|
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
|
||||||
|
|
||||||
size_t size;
|
size_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T> struct PtrStep : public DevPtr<T>
|
template<typename T> struct PtrStep : public DevPtr<T>
|
||||||
{
|
{
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
|
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
|
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
|
||||||
|
|
||||||
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
|
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
|
||||||
size_t step;
|
size_t step;
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
|
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
|
||||||
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
|
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
||||||
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> struct PtrStepSz : public PtrStep<T>
|
template <typename T> struct PtrStepSz : public PtrStep<T>
|
||||||
{
|
{
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
|
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
|
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
|
||||||
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
|
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
|
||||||
|
|
||||||
int cols;
|
template <typename U>
|
||||||
int rows;
|
explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
|
||||||
};
|
|
||||||
|
int cols;
|
||||||
template <typename T> struct DevMem2D_ : public PtrStepSz<T>
|
int rows;
|
||||||
{
|
};
|
||||||
DevMem2D_() {}
|
|
||||||
DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
|
typedef PtrStepSz<unsigned char> PtrStepSzb;
|
||||||
|
typedef PtrStepSz<float> PtrStepSzf;
|
||||||
template <typename U>
|
typedef PtrStepSz<int> PtrStepSzi;
|
||||||
explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
|
|
||||||
};
|
typedef PtrStep<unsigned char> PtrStepb;
|
||||||
|
typedef PtrStep<float> PtrStepf;
|
||||||
template<typename T> struct PtrElemStep_ : public PtrStep<T>
|
typedef PtrStep<int> PtrStepi;
|
||||||
{
|
|
||||||
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
|
|
||||||
{
|
#if defined __GNUC__
|
||||||
StaticAssert<256 % sizeof(T) == 0>::check();
|
#define __CV_GPU_DEPR_BEFORE__
|
||||||
|
#define __CV_GPU_DEPR_AFTER__ __attribute__ ((deprecated))
|
||||||
PtrStep<T>::step /= PtrStep<T>::elem_size;
|
#elif defined(__MSVC__) //|| defined(__CUDACC__)
|
||||||
}
|
#pragma deprecated(DevMem2D_)
|
||||||
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
|
#define __CV_GPU_DEPR_BEFORE__ __declspec(deprecated)
|
||||||
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
|
#define __CV_GPU_DEPR_AFTER__
|
||||||
|
#else
|
||||||
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
#define __CV_GPU_DEPR_BEFORE__
|
||||||
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
#define __CV_GPU_DEPR_AFTER__
|
||||||
};
|
#endif
|
||||||
|
|
||||||
template<typename T> struct PtrStep_ : public PtrStep<T>
|
template <typename T> struct __CV_GPU_DEPR_BEFORE__ DevMem2D_ : public PtrStepSz<T>
|
||||||
{
|
{
|
||||||
PtrStep_() {}
|
DevMem2D_() {}
|
||||||
PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
|
DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
|
||||||
};
|
|
||||||
|
template <typename U>
|
||||||
typedef DevMem2D_<unsigned char> DevMem2Db;
|
explicit __CV_GPU_DEPR_BEFORE__ DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
|
||||||
typedef DevMem2Db DevMem2D;
|
} __CV_GPU_DEPR_AFTER__ ;
|
||||||
typedef DevMem2D_<float> DevMem2Df;
|
|
||||||
typedef DevMem2D_<int> DevMem2Di;
|
typedef DevMem2D_<unsigned char> DevMem2Db;
|
||||||
|
typedef DevMem2Db DevMem2D;
|
||||||
typedef PtrStep<unsigned char> PtrStepb;
|
typedef DevMem2D_<float> DevMem2Df;
|
||||||
typedef PtrStep<float> PtrStepf;
|
typedef DevMem2D_<int> DevMem2Di;
|
||||||
typedef PtrStep<int> PtrStepi;
|
|
||||||
|
//#undef __CV_GPU_DEPR_BEFORE__
|
||||||
typedef PtrElemStep_<unsigned char> PtrElemStep;
|
//#undef __CV_GPU_DEPR_AFTER__
|
||||||
typedef PtrElemStep_<float> PtrElemStepf;
|
|
||||||
typedef PtrElemStep_<int> PtrElemStepi;
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
#endif // __cplusplus
|
||||||
#endif // __cplusplus
|
|
||||||
|
#endif /* __OPENCV_CORE_DEVPTRS_HPP__ */
|
||||||
#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
|
|
@ -46,7 +46,7 @@
|
|||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
||||||
#include "opencv2/core/core.hpp"
|
#include "opencv2/core/core.hpp"
|
||||||
#include "opencv2/core/devmem2d.hpp"
|
#include "opencv2/core/cuda_devptrs.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu
|
namespace cv { namespace gpu
|
||||||
{
|
{
|
||||||
@ -268,10 +268,14 @@ namespace cv { namespace gpu
|
|||||||
template<typename _Tp> _Tp* ptr(int y = 0);
|
template<typename _Tp> _Tp* ptr(int y = 0);
|
||||||
template<typename _Tp> const _Tp* ptr(int y = 0) const;
|
template<typename _Tp> const _Tp* ptr(int y = 0) const;
|
||||||
|
|
||||||
template <typename _Tp> operator DevMem2D_<_Tp>() const;
|
template <typename _Tp> operator PtrStepSz<_Tp>() const;
|
||||||
template <typename _Tp> operator PtrStep_<_Tp>() const;
|
|
||||||
template <typename _Tp> operator PtrStep<_Tp>() const;
|
template <typename _Tp> operator PtrStep<_Tp>() const;
|
||||||
|
|
||||||
|
// Deprecated function
|
||||||
|
__CV_GPU_DEPR_BEFORE__ template <typename _Tp> operator DevMem2D_<_Tp>() const __CV_GPU_DEPR_AFTER__;
|
||||||
|
#undef __CV_GPU_DEPR_BEFORE__
|
||||||
|
#undef __CV_GPU_DEPR_AFTER__
|
||||||
|
|
||||||
/*! includes several bit-fields:
|
/*! includes several bit-fields:
|
||||||
- the magic signature
|
- the magic signature
|
||||||
- continuity flag
|
- continuity flag
|
||||||
@ -502,14 +506,9 @@ namespace cv { namespace gpu
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T> inline GpuMat::operator DevMem2D_<T>() const
|
template <class T> inline GpuMat::operator PtrStepSz<T>() const
|
||||||
{
|
{
|
||||||
return DevMem2D_<T>(rows, cols, (T*)data, step);
|
return PtrStepSz<T>(rows, cols, (T*)data, step);
|
||||||
}
|
|
||||||
|
|
||||||
template <class T> inline GpuMat::operator PtrStep_<T>() const
|
|
||||||
{
|
|
||||||
return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T> inline GpuMat::operator PtrStep<T>() const
|
template <class T> inline GpuMat::operator PtrStep<T>() const
|
||||||
@ -517,6 +516,11 @@ namespace cv { namespace gpu
|
|||||||
return PtrStep<T>((T*)data, step);
|
return PtrStep<T>((T*)data, step);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T> inline GpuMat::operator DevMem2D_<T>() const
|
||||||
|
{
|
||||||
|
return DevMem2D_<T>(rows, cols, (T*)data, step);
|
||||||
|
}
|
||||||
|
|
||||||
inline GpuMat createContinuous(int rows, int cols, int type)
|
inline GpuMat createContinuous(int rows, int cols, int type)
|
||||||
{
|
{
|
||||||
GpuMat m;
|
GpuMat m;
|
||||||
|
@ -44,6 +44,18 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace gpu { namespace device
|
||||||
|
{
|
||||||
|
void writeScalar(const uchar*);
|
||||||
|
void writeScalar(const schar*);
|
||||||
|
void writeScalar(const ushort*);
|
||||||
|
void writeScalar(const short int*);
|
||||||
|
void writeScalar(const int*);
|
||||||
|
void writeScalar(const float*);
|
||||||
|
void writeScalar(const double*);
|
||||||
|
void convert_gpu(PtrStepSzb, int, PtrStepSzb, int, double, double, cudaStream_t);
|
||||||
|
}}}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T> struct shift_and_sizeof;
|
template <typename T> struct shift_and_sizeof;
|
||||||
@ -59,17 +71,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
////////////////////////////////// CopyTo /////////////////////////////////
|
////////////////////////////////// CopyTo /////////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
|
template <typename T> void copyToWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (colorMask)
|
if (colorMask)
|
||||||
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMask(mask), stream);
|
cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMask(mask), stream);
|
||||||
else
|
else
|
||||||
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
|
cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, size_t elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream)
|
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
|
||||||
|
|
||||||
static func_t tab[] =
|
static func_t tab[] =
|
||||||
{
|
{
|
||||||
@ -164,7 +176,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream)
|
void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
writeScalar(scalar);
|
writeScalar(scalar);
|
||||||
|
|
||||||
@ -178,16 +190,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void set_to_gpu<uchar >(DevMem2Db mat, const uchar* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<schar >(DevMem2Db mat, const schar* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<ushort>(DevMem2Db mat, const ushort* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<short >(DevMem2Db mat, const short* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<int >(DevMem2Db mat, const int* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<float >(DevMem2Db mat, const float* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<double>(DevMem2Db mat, const double* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream)
|
void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
writeScalar(scalar);
|
writeScalar(scalar);
|
||||||
|
|
||||||
@ -201,13 +213,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void set_to_gpu<uchar >(DevMem2Db mat, const uchar* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<schar >(DevMem2Db mat, const schar* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<ushort>(DevMem2Db mat, const ushort* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<short >(DevMem2Db mat, const short* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<int >(DevMem2Db mat, const int* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<float >(DevMem2Db mat, const float* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, int channels, cudaStream_t stream);
|
||||||
template void set_to_gpu<double>(DevMem2Db mat, const double* scalar, int channels, cudaStream_t stream);
|
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||||
@ -274,12 +286,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<typename T, typename D>
|
template<typename T, typename D>
|
||||||
void cvt_(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream)
|
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||||
Convertor<T, D> op(alpha, beta);
|
Convertor<T, D> op(alpha, beta);
|
||||||
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
cv::gpu::device::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined __clang__
|
#if defined __clang__
|
||||||
@ -287,9 +299,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
# pragma clang diagnostic ignored "-Wmissing-declarations"
|
# pragma clang diagnostic ignored "-Wmissing-declarations"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)
|
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2Db src, DevMem2Db dst, double alpha, double beta, cudaStream_t stream);
|
typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t tab[8][8] =
|
static const caller_t tab[8][8] =
|
||||||
{
|
{
|
||||||
|
@ -761,15 +761,15 @@ namespace
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, size_t elemSize1, int cn, DevMem2Db mask, bool colorMask, cudaStream_t stream);
|
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);
|
void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void set_to_gpu(DevMem2Db mat, const T* scalar, DevMem2Db mask, int channels, cudaStream_t stream);
|
void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream);
|
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -787,9 +787,22 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace cv { namespace gpu
|
namespace cv { namespace gpu
|
||||||
{
|
{
|
||||||
CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
|
CV_EXPORTS void copyWithMask(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, const cv::gpu::GpuMat&, CUstream_st*);
|
||||||
|
CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&);
|
||||||
|
CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, double, double, CUstream_st*);
|
||||||
|
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, CUstream_st*);
|
||||||
|
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, CUstream_st*);
|
||||||
|
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar);
|
||||||
|
CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&);
|
||||||
|
}}
|
||||||
|
|
||||||
|
|
||||||
|
namespace cv { namespace gpu
|
||||||
|
{
|
||||||
|
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
CV_Assert(src.size() == dst.size() && src.type() == dst.type());
|
CV_Assert(src.size() == dst.size() && src.type() == dst.type());
|
||||||
CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()));
|
CV_Assert(src.size() == mask.size() && mask.depth() == CV_8U && (mask.channels() == 1 || mask.channels() == src.channels()));
|
||||||
@ -797,17 +810,17 @@ namespace cv { namespace gpu
|
|||||||
cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream);
|
cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.elemSize1(), src.channels(), mask.reshape(1), mask.channels() != 1, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)
|
void convertTo(const GpuMat& src, GpuMat& dst)
|
||||||
{
|
{
|
||||||
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
|
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
|
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
|
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
|
void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(GpuMat& src, Scalar s, cudaStream_t stream);
|
typedef void (*caller_t)(GpuMat& src, Scalar s, cudaStream_t stream);
|
||||||
|
|
||||||
@ -820,7 +833,7 @@ namespace cv { namespace gpu
|
|||||||
callers[src.depth()](src, s, stream);
|
callers[src.depth()](src, s, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
|
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
|
typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -833,12 +846,12 @@ namespace cv { namespace gpu
|
|||||||
callers[src.depth()](src, s, mask, stream);
|
callers[src.depth()](src, s, mask, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_EXPORTS void setTo(GpuMat& src, Scalar s)
|
void setTo(GpuMat& src, Scalar s)
|
||||||
{
|
{
|
||||||
setTo(src, s, 0);
|
setTo(src, s, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
CV_EXPORTS void setTo(GpuMat& src, Scalar s, const GpuMat& mask)
|
void setTo(GpuMat& src, Scalar s, const GpuMat& mask)
|
||||||
{
|
{
|
||||||
setTo(src, s, mask, 0);
|
setTo(src, s, mask, 0);
|
||||||
}
|
}
|
||||||
|
@ -5,24 +5,24 @@ Data Structures
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
gpu::DevMem2D\_
|
gpu::PtrStepSz
|
||||||
---------------
|
---------------
|
||||||
.. ocv:class:: gpu::DevMem2D\_
|
.. ocv:class:: gpu::PtrStepSz
|
||||||
|
|
||||||
Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA kernels). Typically, it is used internally by OpenCV and by users who write device code. You can call its members from both host and device code. ::
|
Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compiled code (CUDA kernels). Typically, it is used internally by OpenCV and by users who write device code. You can call its members from both host and device code. ::
|
||||||
|
|
||||||
template <typename T> struct DevMem2D_
|
template <typename T> struct PtrStepSz
|
||||||
{
|
{
|
||||||
int cols;
|
int cols;
|
||||||
int rows;
|
int rows;
|
||||||
T* data;
|
T* data;
|
||||||
size_t step;
|
size_t step;
|
||||||
|
|
||||||
DevMem2D_() : cols(0), rows(0), data(0), step(0){};
|
PtrStepSz() : cols(0), rows(0), data(0), step(0){};
|
||||||
DevMem2D_(int rows, int cols, T *data, size_t step);
|
PtrStepSz(int rows, int cols, T *data, size_t step);
|
||||||
|
|
||||||
template <typename U>
|
template <typename U>
|
||||||
explicit DevMem2D_(const DevMem2D_<U>& d);
|
explicit PtrStepSz(const PtrStepSz<U>& d);
|
||||||
|
|
||||||
typedef T elem_type;
|
typedef T elem_type;
|
||||||
enum { elem_size = sizeof(elem_type) };
|
enum { elem_size = sizeof(elem_type) };
|
||||||
@ -34,25 +34,25 @@ Lightweight class encapsulating pitched memory on a GPU and passed to nvcc-compi
|
|||||||
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
|
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef DevMem2D_<unsigned char> DevMem2D;
|
typedef PtrStepSz<unsigned char> PtrStepSzb;
|
||||||
typedef DevMem2D_<float> DevMem2Df;
|
typedef PtrStepSz<float> PtrStepSzf;
|
||||||
typedef DevMem2D_<int> DevMem2Di;
|
typedef PtrStepSz<int> PtrStepSzi;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
gpu::PtrStep\_
|
gpu::PtrStep
|
||||||
--------------
|
--------------
|
||||||
.. ocv:class:: gpu::PtrStep\_
|
.. ocv:class:: gpu::PtrStep
|
||||||
|
|
||||||
Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. ::
|
Structure similar to :ocv:class:`gpu::PtrStepSz` but containing only a pointer and row step. Width and height fields are excluded due to performance reasons. The structure is intended for internal use or for users who write device code. ::
|
||||||
|
|
||||||
template<typename T> struct PtrStep_
|
template<typename T> struct PtrStep
|
||||||
{
|
{
|
||||||
T* data;
|
T* data;
|
||||||
size_t step;
|
size_t step;
|
||||||
|
|
||||||
PtrStep_();
|
PtrStep();
|
||||||
PtrStep_(const DevMem2D_<T>& mem);
|
PtrStep(const PtrStepSz<T>& mem);
|
||||||
|
|
||||||
typedef T elem_type;
|
typedef T elem_type;
|
||||||
enum { elem_size = sizeof(elem_type) };
|
enum { elem_size = sizeof(elem_type) };
|
||||||
@ -62,25 +62,9 @@ Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer a
|
|||||||
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
|
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef PtrStep_<unsigned char> PtrStep;
|
typedef PtrStep<unsigned char> PtrStep;
|
||||||
typedef PtrStep_<float> PtrStepf;
|
typedef PtrStep<float> PtrStepf;
|
||||||
typedef PtrStep_<int> PtrStepi;
|
typedef PtrStep<int> PtrStepi;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
gpu::PtrElemStep\_
|
|
||||||
------------------
|
|
||||||
.. ocv:class:: gpu::PtrElemStep\_
|
|
||||||
|
|
||||||
Structure similar to :ocv:class:`gpu::DevMem2D_` but containing only a pointer and a row step in elements. Width and height fields are excluded due to performance reasons. This class can only be constructed if ``sizeof(T)`` is a multiple of 256. The structure is intended for internal use or for users who write device code. ::
|
|
||||||
|
|
||||||
template<typename T> struct PtrElemStep_ : public PtrStep_<T>
|
|
||||||
{
|
|
||||||
PtrElemStep_(const DevMem2D_<T>& mem);
|
|
||||||
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0);
|
|
||||||
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
gpu::GpuMat
|
gpu::GpuMat
|
||||||
@ -93,7 +77,7 @@ Base storage class for GPU memory with reference counting. Its interface matches
|
|||||||
* no functions that return references to their data (because references on GPU are not valid for CPU)
|
* no functions that return references to their data (because references on GPU are not valid for CPU)
|
||||||
* no expression templates technique support
|
* no expression templates technique support
|
||||||
|
|
||||||
Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::DevMem2D_` and :ocv:class:`gpu::PtrStep_` so it can be passed directly to the kernel.
|
Beware that the latter limitation may lead to overloaded matrix operators that cause memory allocations. The ``GpuMat`` class is convertible to :ocv:class:`gpu::PtrStepSz` and :ocv:class:`gpu::PtrStep` so it can be passed directly to the kernel.
|
||||||
|
|
||||||
.. note:: In contrast with :ocv:class:`Mat`, in most cases ``GpuMat::isContinuous() == false`` . This means that rows are aligned to a size depending on the hardware. Single-row ``GpuMat`` is always a continuous matrix.
|
.. note:: In contrast with :ocv:class:`Mat`, in most cases ``GpuMat::isContinuous() == false`` . This means that rows are aligned to a size depending on the hardware. Single-row ``GpuMat`` is always a continuous matrix.
|
||||||
|
|
||||||
@ -113,10 +97,10 @@ Beware that the latter limitation may lead to overloaded matrix operators that c
|
|||||||
//! builds GpuMat from Mat. Blocks uploading to device.
|
//! builds GpuMat from Mat. Blocks uploading to device.
|
||||||
explicit GpuMat (const Mat& m);
|
explicit GpuMat (const Mat& m);
|
||||||
|
|
||||||
//! returns lightweight DevMem2D_ structure for passing
|
//! returns lightweight PtrStepSz structure for passing
|
||||||
//to nvcc-compiled code. Contains size, data ptr and step.
|
//to nvcc-compiled code. Contains size, data ptr and step.
|
||||||
template <class T> operator DevMem2D_<T>() const;
|
template <class T> operator PtrStepSz<T>() const;
|
||||||
template <class T> operator PtrStep_<T>() const;
|
template <class T> operator PtrStep<T>() const;
|
||||||
|
|
||||||
//! blocks uploading data to GpuMat.
|
//! blocks uploading data to GpuMat.
|
||||||
void upload(const cv::Mat& m);
|
void upload(const cv::Mat& m);
|
||||||
|
@ -40,4 +40,4 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "opencv2/core/devmem2d.hpp"
|
#include "opencv2/core/cuda_devptrs.hpp"
|
||||||
|
@ -454,8 +454,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace mathfunc
|
namespace mathfunc
|
||||||
{
|
{
|
||||||
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
|
||||||
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
float decisionThreshold, int maxFeatures, int numInitializationFrames);
|
float decisionThreshold, int maxFeatures, int numInitializationFrames);
|
||||||
|
|
||||||
template <typename SrcT>
|
template <typename SrcT>
|
||||||
void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
|
void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
|
||||||
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@ -109,7 +109,7 @@ void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::bgfg_gmg;
|
using namespace cv::gpu::device::bgfg_gmg;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
|
typedef void (*func_t)(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
|
||||||
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
{
|
{
|
||||||
|
@ -62,14 +62,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace mog
|
namespace mog
|
||||||
{
|
{
|
||||||
void mog_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var,
|
void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var,
|
||||||
int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma,
|
int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
void getBackgroundImage_gpu(int cn, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
|
void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
|
||||||
|
|
||||||
void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
|
void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal);
|
||||||
void mog2_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
|
void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
|
||||||
void getBackgroundImage2_gpu(int cn, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream);
|
void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -57,9 +57,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
|
void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);
|
||||||
|
|
||||||
void init_gpu(DevMem2Db frame, int cn, DevMem2Db samples, DevMem2D_<unsigned int> randStates, cudaStream_t stream);
|
void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
|
||||||
|
|
||||||
void update_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<unsigned int> randStates, cudaStream_t stream);
|
void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<unsigned int> randStates, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -59,10 +59,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace bilateral_filter
|
namespace bilateral_filter
|
||||||
{
|
{
|
||||||
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
||||||
|
|
||||||
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||||
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
void bilateral_filter_gpu(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -120,7 +120,7 @@ namespace
|
|||||||
disp.copyTo(dst);
|
disp.copyTo(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
bilateral_filter_gpu((PtrStepSz<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
||||||
|
@ -86,72 +86,72 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace bf_match
|
namespace bf_match
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace bf_knnmatch
|
namespace bf_knnmatch
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace bf_radius_match
|
namespace bf_radius_match
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@ -200,8 +200,8 @@ void cv::gpu::BFMatcher_GPU::matchSingle(const GpuMat& query, const GpuMat& trai
|
|||||||
|
|
||||||
using namespace cv::gpu::device::bf_match;
|
using namespace cv::gpu::device::bf_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callersL1[] =
|
static const caller_t callersL1[] =
|
||||||
@ -301,9 +301,9 @@ void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat&
|
|||||||
|
|
||||||
if (masks.empty())
|
if (masks.empty())
|
||||||
{
|
{
|
||||||
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(DevMem2Db)));
|
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
|
||||||
|
|
||||||
DevMem2Db* trainCollectionCPU_ptr = trainCollectionCPU.ptr<DevMem2Db>();
|
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
|
||||||
|
|
||||||
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
|
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
|
||||||
*trainCollectionCPU_ptr = trainDescCollection[i];
|
*trainCollectionCPU_ptr = trainDescCollection[i];
|
||||||
@ -315,10 +315,10 @@ void cv::gpu::BFMatcher_GPU::makeGpuCollection(GpuMat& trainCollection, GpuMat&
|
|||||||
{
|
{
|
||||||
CV_Assert(masks.size() == trainDescCollection.size());
|
CV_Assert(masks.size() == trainDescCollection.size());
|
||||||
|
|
||||||
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(DevMem2Db)));
|
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
|
||||||
Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
|
Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
|
||||||
|
|
||||||
DevMem2Db* trainCollectionCPU_ptr = trainCollectionCPU.ptr<DevMem2Db>();
|
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
|
||||||
PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
|
PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
|
||||||
|
|
||||||
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
|
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
|
||||||
@ -346,8 +346,8 @@ void cv::gpu::BFMatcher_GPU::matchCollection(const GpuMat& query, const GpuMat&
|
|||||||
|
|
||||||
using namespace cv::gpu::device::bf_match;
|
using namespace cv::gpu::device::bf_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callersL1[] =
|
static const caller_t callersL1[] =
|
||||||
@ -460,8 +460,8 @@ void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& t
|
|||||||
|
|
||||||
using namespace cv::gpu::device::bf_knnmatch;
|
using namespace cv::gpu::device::bf_knnmatch;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callersL1[] =
|
static const caller_t callersL1[] =
|
||||||
@ -592,8 +592,8 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuM
|
|||||||
|
|
||||||
using namespace cv::gpu::device::bf_knnmatch;
|
using namespace cv::gpu::device::bf_knnmatch;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callersL1[] =
|
static const caller_t callersL1[] =
|
||||||
@ -776,8 +776,8 @@ void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat
|
|||||||
|
|
||||||
using namespace cv::gpu::device::bf_radius_match;
|
using namespace cv::gpu::device::bf_radius_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callersL1[] =
|
static const caller_t callersL1[] =
|
||||||
@ -911,8 +911,8 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat&
|
|||||||
|
|
||||||
using namespace cv::gpu::device::bf_radius_match;
|
using namespace cv::gpu::device::bf_radius_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callersL1[] =
|
static const caller_t callersL1[] =
|
||||||
@ -964,8 +964,8 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat&
|
|||||||
caller_t func = callers[query.depth()];
|
caller_t func = callers[query.depth()];
|
||||||
CV_Assert(func != 0);
|
CV_Assert(func != 0);
|
||||||
|
|
||||||
vector<DevMem2Db> trains_(trainDescCollection.begin(), trainDescCollection.end());
|
vector<PtrStepSzb> trains_(trainDescCollection.begin(), trainDescCollection.end());
|
||||||
vector<DevMem2Db> masks_(masks.begin(), masks.end());
|
vector<PtrStepSzb> masks_(masks.begin(), masks.end());
|
||||||
|
|
||||||
func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
|
func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
|
||||||
trainIdx, imgIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
|
trainIdx, imgIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
|
||||||
|
@ -60,12 +60,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace transform_points
|
namespace transform_points
|
||||||
{
|
{
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
|
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace project_points
|
namespace project_points
|
||||||
{
|
{
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
|
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace solve_pnp_ransac
|
namespace solve_pnp_ransac
|
||||||
|
@ -352,18 +352,18 @@ namespace cv { namespace gpu { namespace device
|
|||||||
float initalScale,
|
float initalScale,
|
||||||
float factor,
|
float factor,
|
||||||
int total,
|
int total,
|
||||||
const DevMem2Db& mstages,
|
const PtrStepSzb& mstages,
|
||||||
const int nstages,
|
const int nstages,
|
||||||
const DevMem2Di& mnodes,
|
const PtrStepSzi& mnodes,
|
||||||
const DevMem2Df& mleaves,
|
const PtrStepSzf& mleaves,
|
||||||
const DevMem2Di& msubsets,
|
const PtrStepSzi& msubsets,
|
||||||
const DevMem2Db& mfeatures,
|
const PtrStepSzb& mfeatures,
|
||||||
const int subsetSize,
|
const int subsetSize,
|
||||||
DevMem2D_<int4> objects,
|
PtrStepSz<int4> objects,
|
||||||
unsigned int* classified,
|
unsigned int* classified,
|
||||||
DevMem2Di integral);
|
PtrStepSzi integral);
|
||||||
|
|
||||||
void connectedConmonents(DevMem2D_<int4> candidates, int ncandidates, DevMem2D_<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
|
void connectedConmonents(PtrStepSz<int4> candidates, int ncandidates, PtrStepSz<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -59,9 +59,9 @@ namespace cv { namespace gpu {
|
|||||||
namespace device
|
namespace device
|
||||||
{
|
{
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void Bayer2BGR_8u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void Bayer2BGR_16u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ using namespace ::cv::gpu::device;
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
typedef void (*gpu_func_t)(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*gpu_func_t)(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
|
|
||||||
void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream)
|
void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream)
|
||||||
{
|
{
|
||||||
@ -1336,7 +1336,7 @@ namespace
|
|||||||
|
|
||||||
void bayer_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, bool blue_last, bool start_with_green, Stream& stream)
|
void bayer_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, bool blue_last, bool start_with_green, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||||
static const func_t funcs[3][4] =
|
static const func_t funcs[3][4] =
|
||||||
{
|
{
|
||||||
{0,0,Bayer2BGR_8u_gpu<3>, Bayer2BGR_8u_gpu<4>},
|
{0,0,Bayer2BGR_8u_gpu<3>, Bayer2BGR_8u_gpu<4>},
|
||||||
|
@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
|
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
|
||||||
}
|
}
|
||||||
|
|
||||||
void NV12ToARGB_gpu(const PtrStepb decodedFrame, DevMem2D_<uint> interopFrame, cudaStream_t stream)
|
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
|
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
|
||||||
|
@ -209,7 +209,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match Unrolled Cached
|
// Match Unrolled Cached
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
__device__ void loadQueryToSmem(int queryIdx, const PtrStepSz<T>& query, U* s_query)
|
||||||
{
|
{
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||||
@ -220,7 +220,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolledCached(int queryIdx, const PtrStepSz<T>& query, int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
@ -281,7 +281,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void matchUnrolledCached(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -314,8 +314,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolledCached(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const PtrStepSz<int2>& trainIdx, const PtrStepSz<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -331,7 +331,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void matchUnrolledCached(const PtrStepSz<T> query, const PtrStepSz<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -353,7 +353,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[imgIdx];
|
const PtrStepSz<T> train = trains[imgIdx];
|
||||||
m.next();
|
m.next();
|
||||||
loopUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
|
loopUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
|
||||||
}
|
}
|
||||||
@ -375,8 +375,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolledCached(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const PtrStepSz<int2>& trainIdx, const PtrStepSz<int2>& imgIdx, const PtrStepSz<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -395,7 +395,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolled(int queryIdx, const PtrStepSz<T>& query, int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
@ -460,7 +460,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void matchUnrolled(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -491,8 +491,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const PtrStepSz<int2>& trainIdx, const PtrStepSz<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -508,7 +508,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void matchUnrolled(const PtrStepSz<T> query, const PtrStepSz<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -528,7 +528,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[imgIdx];
|
const PtrStepSz<T> train = trains[imgIdx];
|
||||||
m.next();
|
m.next();
|
||||||
loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
|
loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
|
||||||
}
|
}
|
||||||
@ -550,8 +550,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const PtrStepSz<int2>& trainIdx, const PtrStepSz<int2>& imgIdx, const PtrStepSz<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -570,7 +570,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loop(int queryIdx, const PtrStepSz<T>& query, int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance1, float& bestDistance2,
|
float& bestDistance1, float& bestDistance2,
|
||||||
int& bestTrainIdx1, int& bestTrainIdx2,
|
int& bestTrainIdx1, int& bestTrainIdx2,
|
||||||
@ -634,7 +634,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
__global__ void match(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, int2* bestTrainIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -665,8 +665,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<float2>& distance,
|
const PtrStepSz<int2>& trainIdx, const PtrStepSz<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -682,7 +682,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
__global__ void match(const PtrStepSz<T> query, const PtrStepSz<T>* trains, int n, const Mask mask, int2* bestTrainIdx, int2* bestImgIdx, float2* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -702,7 +702,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[imgIdx];
|
const PtrStepSz<T> train = trains[imgIdx];
|
||||||
m.next();
|
m.next();
|
||||||
loop<BLOCK_SIZE, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
|
loop<BLOCK_SIZE, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance1, myBestDistance2, myBestTrainIdx1, myBestTrainIdx2, myBestImgIdx1, myBestImgIdx2);
|
||||||
}
|
}
|
||||||
@ -724,8 +724,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2D_<int2>& trainIdx, const DevMem2D_<int2>& imgIdx, const DevMem2D_<float2>& distance,
|
const PtrStepSz<int2>& trainIdx, const PtrStepSz<int2>& imgIdx, const PtrStepSz<float2>& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -744,66 +744,66 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// knnMatch 2 dispatcher
|
// knnMatch 2 dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match2Dispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)cc;
|
(void)cc;
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolledCached<16, 64, Dist>(query, train, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 128)
|
else if (query.cols <= 128)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 128, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolledCached<16, 128, Dist>(query, train, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
/*else if (query.cols <= 256)
|
/*else if (query.cols <= 256)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
match<16, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
match<16, Dist>(query, train, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void match2Dispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match2Dispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)cc;
|
(void)cc;
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolledCached<16, 64, Dist>(query, trains, n, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<int2> >(imgIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 128)
|
else if (query.cols <= 128)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 128, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolledCached<16, 128, Dist>(query, trains, n, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<int2> >(imgIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
/*else if (query.cols <= 256)
|
/*else if (query.cols <= 256)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 256, Dist>(query, trains, n, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<int2> >(imgIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 512)
|
else if (query.cols <= 512)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 512, Dist>(query, trains, n, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<int2> >(imgIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
else if (query.cols <= 1024)
|
else if (query.cols <= 1024)
|
||||||
{
|
{
|
||||||
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<int2> >(imgIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}*/
|
}*/
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
match<16, Dist>(query, trains, n, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<int2> >(imgIdx), static_cast< DevMem2D_<float2> > (distance), stream);
|
match<16, Dist>(query, trains, n, mask, static_cast< PtrStepSz<int2> >(trainIdx), static_cast< PtrStepSz<int2> >(imgIdx), static_cast< PtrStepSz<float2> > (distance), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -811,7 +811,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Calc distance kernel
|
// Calc distance kernel
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void calcDistanceUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, PtrStepf allDist)
|
__global__ void calcDistanceUnrolled(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, PtrStepf allDist)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -860,7 +860,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void calcDistanceUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
void calcDistanceUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask, const PtrStepSzf& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
@ -875,7 +875,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void calcDistance(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, PtrStepf allDist)
|
__global__ void calcDistance(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, PtrStepf allDist)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -923,7 +923,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void calcDistance(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask, const DevMem2Df& allDist, cudaStream_t stream)
|
void calcDistance(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask, const PtrStepSzf& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
@ -941,8 +941,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Calc Distance dispatcher
|
// Calc Distance dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void calcDistanceDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void calcDistanceDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2Df& allDist,
|
const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)cc;
|
(void)cc;
|
||||||
@ -976,7 +976,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// find knn match kernel
|
// find knn match kernel
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
__global__ void findBestMatch(DevMem2Df allDist, int i, PtrStepi trainIdx, PtrStepf distance)
|
__global__ void findBestMatch(PtrStepSzf allDist, int i, PtrStepi trainIdx, PtrStepf distance)
|
||||||
{
|
{
|
||||||
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
||||||
__shared__ float s_dist[SMEM_SIZE];
|
__shared__ float s_dist[SMEM_SIZE];
|
||||||
@ -1017,7 +1017,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE>
|
template <int BLOCK_SIZE>
|
||||||
void findKnnMatch(int k, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist, cudaStream_t stream)
|
void findKnnMatch(int k, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSzf& allDist, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, 1, 1);
|
const dim3 block(BLOCK_SIZE, 1, 1);
|
||||||
const dim3 grid(trainIdx.rows, 1, 1);
|
const dim3 grid(trainIdx.rows, 1, 1);
|
||||||
@ -1032,17 +1032,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void findKnnMatchDispatcher(int k, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream)
|
void findKnnMatchDispatcher(int k, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
findKnnMatch<256>(k, static_cast<DevMem2Di>(trainIdx), static_cast<DevMem2Df>(distance), allDist, stream);
|
findKnnMatch<256>(k, static_cast<PtrStepSzi>(trainIdx), static_cast<PtrStepSzf>(distance), allDist, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knn match Dispatcher
|
// knn match Dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, int k, const Mask& mask,
|
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, int k, const Mask& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (k == 2)
|
if (k == 2)
|
||||||
@ -1059,104 +1059,104 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// knn match caller
|
// knn match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
||||||
else
|
else
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
||||||
else
|
else
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), k, SingleMask(mask), trainIdx, distance, allDist, cc, stream);
|
||||||
else
|
else
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), k, WithOutMask(), trainIdx, distance, allDist, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, int k, const DevMem2Db& mask, const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, int k, const PtrStepSzb& mask, const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
match2Dispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
||||||
else
|
else
|
||||||
match2Dispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void match2L1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L1_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
match2Dispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
||||||
else
|
else
|
||||||
match2Dispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void match2L2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2L2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Di& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2L2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
template void match2L2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2L2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
match2Dispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data), trainIdx, imgIdx, distance, cc, stream);
|
||||||
else
|
else
|
||||||
match2Dispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
match2Dispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, WithOutMask(), trainIdx, imgIdx, distance, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void match2Hamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2Hamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2Hamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
template void match2Hamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2Hamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2Hamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
template void match2Hamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, int cc, cudaStream_t stream);
|
||||||
} // namespace bf_knnmatch
|
} // namespace bf_knnmatch
|
||||||
}}} // namespace cv { namespace gpu { namespace device {
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
@ -86,7 +86,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match Unrolled Cached
|
// Match Unrolled Cached
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T, typename U>
|
||||||
__device__ void loadQueryToSmem(int queryIdx, const DevMem2D_<T>& query, U* s_query)
|
__device__ void loadQueryToSmem(int queryIdx, const PtrStepSz<T>& query, U* s_query)
|
||||||
{
|
{
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||||
@ -97,7 +97,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolledCached(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolledCached(int queryIdx, const PtrStepSz<T>& query,volatile int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
__global__ void matchUnrolledCached(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -174,8 +174,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolledCached(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -191,7 +191,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolledCached(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void matchUnrolledCached(const PtrStepSz<T> query, const PtrStepSz<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -211,7 +211,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[imgIdx];
|
const PtrStepSz<T> train = trains[imgIdx];
|
||||||
m.next();
|
m.next();
|
||||||
loopUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx);
|
loopUnrolledCached<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx);
|
||||||
}
|
}
|
||||||
@ -233,8 +233,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolledCached(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolledCached(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -253,7 +253,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loopUnrolled(int queryIdx, const DevMem2D_<T>& query,volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loopUnrolled(int queryIdx, const PtrStepSz<T>& query,volatile int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
@ -303,7 +303,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
__global__ void matchUnrolled(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -332,8 +332,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -349,7 +349,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void matchUnrolled(const PtrStepSz<T> query, const PtrStepSz<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -367,7 +367,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[imgIdx];
|
const PtrStepSz<T> train = trains[imgIdx];
|
||||||
m.next();
|
m.next();
|
||||||
loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx);
|
loopUnrolled<BLOCK_SIZE, MAX_DESC_LEN, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx);
|
||||||
}
|
}
|
||||||
@ -389,8 +389,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -409,7 +409,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__device__ void loop(int queryIdx, const DevMem2D_<T>& query, volatile int imgIdx, const DevMem2D_<T>& train, const Mask& mask,
|
__device__ void loop(int queryIdx, const PtrStepSz<T>& query, volatile int imgIdx, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
typename Dist::value_type* s_query, typename Dist::value_type* s_train,
|
||||||
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
float& bestDistance, int& bestTrainIdx, int& bestImgIdx)
|
||||||
{
|
{
|
||||||
@ -458,7 +458,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
__global__ void match(const PtrStepSz<T> query, const PtrStepSz<T> train, const Mask mask, int* bestTrainIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
|
|
||||||
@ -487,8 +487,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -504,7 +504,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, const DevMem2D_<T>* trains, int n, const Mask mask,
|
__global__ void match(const PtrStepSz<T> query, const PtrStepSz<T>* trains, int n, const Mask mask,
|
||||||
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
int* bestTrainIdx, int* bestImgIdx, float* bestDistance)
|
||||||
{
|
{
|
||||||
extern __shared__ int smem[];
|
extern __shared__ int smem[];
|
||||||
@ -521,7 +521,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
Mask m = mask;
|
Mask m = mask;
|
||||||
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
for (int imgIdx = 0; imgIdx < n; ++imgIdx)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[imgIdx];
|
const PtrStepSz<T> train = trains[imgIdx];
|
||||||
m.next();
|
m.next();
|
||||||
loop<BLOCK_SIZE, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx);
|
loop<BLOCK_SIZE, Dist>(queryIdx, query, imgIdx, train, m, s_query, s_train, myBestDistance, myBestTrainIdx, myBestImgIdx);
|
||||||
}
|
}
|
||||||
@ -543,8 +543,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -563,8 +563,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match dispatcher
|
// Match dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, const Mask& mask,
|
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)cc;
|
(void)cc;
|
||||||
@ -595,8 +595,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, const Mask& mask,
|
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)cc;
|
(void)cc;
|
||||||
@ -629,152 +629,152 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match caller
|
// Match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), SingleMask(mask),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), WithOutMask(),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), SingleMask(mask),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), WithOutMask(),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), SingleMask(mask),
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), SingleMask(mask),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), WithOutMask(),
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), WithOutMask(),
|
||||||
trainIdx, distance,
|
trainIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& maskCollection, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& maskCollection, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (masks.data)
|
if (masks.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, MaskCollection(masks.data),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains.ptr(), trains.cols, WithOutMask(),
|
||||||
trainIdx, imgIdx, distance,
|
trainIdx, imgIdx, distance,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, int cc, cudaStream_t stream);
|
||||||
} // namespace bf_match
|
} // namespace bf_match
|
||||||
}}} // namespace cv { namespace gpu { namespace device {
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
@ -53,7 +53,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||||
__global__ void matchUnrolled(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
|
__global__ void matchUnrolled(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
|
||||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||||
@ -113,8 +113,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T, typename Mask>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, cudaStream_t stream)
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
@ -130,8 +130,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
template <int BLOCK_SIZE, int MAX_DESC_LEN, typename Dist, typename T>
|
||||||
void matchUnrolled(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void matchUnrolled(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[i];
|
const PtrStepSz<T> train = trains[i];
|
||||||
|
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -165,7 +165,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match
|
// Match
|
||||||
|
|
||||||
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, bool SAVE_IMG_IDX, typename Dist, typename T, typename Mask>
|
||||||
__global__ void match(const DevMem2D_<T> query, int imgIdx, const DevMem2D_<T> train, float maxDistance, const Mask mask,
|
__global__ void match(const PtrStepSz<T> query, int imgIdx, const PtrStepSz<T> train, float maxDistance, const Mask mask,
|
||||||
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
PtrStepi bestTrainIdx, PtrStepi bestImgIdx, PtrStepf bestDistance, unsigned int* nMatches, int maxCount)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||||
@ -224,8 +224,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
template <int BLOCK_SIZE, typename Dist, typename T, typename Mask>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void match(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -242,8 +242,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int BLOCK_SIZE, typename Dist, typename T>
|
template <int BLOCK_SIZE, typename Dist, typename T>
|
||||||
void match(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void match(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
|
||||||
@ -252,7 +252,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
const DevMem2D_<T> train = trains[i];
|
const PtrStepSz<T> train = trains[i];
|
||||||
|
|
||||||
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
const dim3 grid(divUp(train.rows, BLOCK_SIZE), divUp(query.rows, BLOCK_SIZE));
|
||||||
|
|
||||||
@ -277,8 +277,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Match dispatcher
|
// Match dispatcher
|
||||||
|
|
||||||
template <typename Dist, typename T, typename Mask>
|
template <typename Dist, typename T, typename Mask>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>& train, float maxDistance, const Mask& mask,
|
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>& train, float maxDistance, const Mask& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)cc;
|
(void)cc;
|
||||||
@ -309,8 +309,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dist, typename T>
|
template <typename Dist, typename T>
|
||||||
void matchDispatcher(const DevMem2D_<T>& query, const DevMem2D_<T>* trains, int n, float maxDistance, const DevMem2Db* masks,
|
void matchDispatcher(const PtrStepSz<T>& query, const PtrStepSz<T>* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)cc;
|
(void)cc;
|
||||||
@ -343,125 +343,125 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Radius Match caller
|
// Radius Match caller
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, SingleMask(mask),
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, SingleMask(mask),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), static_cast< DevMem2D_<T> >(train), maxDistance, WithOutMask(),
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), static_cast< PtrStepSz<T> >(train), maxDistance, WithOutMask(),
|
||||||
trainIdx, distance, nMatches,
|
trainIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& queryDescs, const DevMem2Db& trainDescs, float maxDistance, const DevMem2Db& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const PtrStepSzb& queryDescs, const PtrStepSzb& trainDescs, float maxDistance, const PtrStepSzb& mask, const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher< L1Dist<T> >(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher< L1Dist<T> >(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchL1_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL1_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL1_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL1_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL1_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher<L2Dist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher<L2Dist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void matchL2_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchL2_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchL2_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchL2_gpu<float >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchL2_gpu<float >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
matchDispatcher<HammingDist>(static_cast< DevMem2D_<T> >(query), (const DevMem2D_<T>*)trains, n, maxDistance, masks,
|
matchDispatcher<HammingDist>(static_cast< PtrStepSz<T> >(query), (const PtrStepSz<T>*)trains, n, maxDistance, masks,
|
||||||
trainIdx, imgIdx, distance, nMatches,
|
trainIdx, imgIdx, distance, nMatches,
|
||||||
cc, stream);
|
cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void matchHamming_gpu<uchar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<uchar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<schar >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<schar >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
} // namespace bf_radius_match
|
} // namespace bf_radius_match
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -168,7 +168,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
template <typename T> struct Quantization : detail::Quantization<VecTraits<T>::cn> {};
|
template <typename T> struct Quantization : detail::Quantization<VecTraits<T>::cn> {};
|
||||||
|
|
||||||
template <typename SrcT>
|
template <typename SrcT>
|
||||||
__global__ void update(const PtrStep_<SrcT> frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_,
|
__global__ void update(const PtrStep<SrcT> frame, PtrStepb fgmask, PtrStepi colors_, PtrStepf weights_, PtrStepi nfeatures_,
|
||||||
const int frameNum, const float learningRate, const bool updateBackgroundModel)
|
const int frameNum, const float learningRate, const bool updateBackgroundModel)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -222,7 +222,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT>
|
template <typename SrcT>
|
||||||
void update_gpu(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures,
|
void update_gpu(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures,
|
||||||
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream)
|
int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(32, 8);
|
const dim3 block(32, 8);
|
||||||
@ -230,7 +230,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
update<SrcT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
|
update<SrcT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, colors, weights, nfeatures, frameNum, learningRate, updateBackgroundModel);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
@ -238,16 +238,16 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void update_gpu<uchar >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<uchar >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
template void update_gpu<uchar3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<uchar3 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
template void update_gpu<uchar4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<uchar4 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
|
|
||||||
template void update_gpu<ushort >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<ushort >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
template void update_gpu<ushort3>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<ushort3>(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
template void update_gpu<ushort4>(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<ushort4>(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
|
|
||||||
template void update_gpu<float >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<float >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
template void update_gpu<float3 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<float3 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
template void update_gpu<float4 >(DevMem2Db frame, PtrStepb fgmask, DevMem2Di colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
template void update_gpu<float4 >(PtrStepSzb frame, PtrStepb fgmask, PtrStepSzi colors, PtrStepf weights, PtrStepi nfeatures, int frameNum, float learningRate, bool updateBackgroundModel, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
@ -121,8 +121,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// MOG without learning
|
// MOG without learning
|
||||||
|
|
||||||
template <typename SrcT, typename WorkT>
|
template <typename SrcT, typename WorkT>
|
||||||
__global__ void mog_withoutLearning(const DevMem2D_<SrcT> frame, PtrStepb fgmask,
|
__global__ void mog_withoutLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
|
||||||
const PtrStepf gmm_weight, const PtrStep_<WorkT> gmm_mean, const PtrStep_<WorkT> gmm_var,
|
const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, const PtrStep<WorkT> gmm_var,
|
||||||
const int nmixtures, const float varThreshold, const float backgroundRatio)
|
const int nmixtures, const float varThreshold, const float backgroundRatio)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -172,7 +172,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT, typename WorkT>
|
template <typename SrcT, typename WorkT>
|
||||||
void mog_withoutLearning_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Db mean, DevMem2Db var,
|
void mog_withoutLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var,
|
||||||
int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream)
|
int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -180,8 +180,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(mog_withoutLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask,
|
mog_withoutLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
|
||||||
weight, (DevMem2D_<WorkT>) mean, (DevMem2D_<WorkT>) var,
|
weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
|
||||||
nmixtures, varThreshold, backgroundRatio);
|
nmixtures, varThreshold, backgroundRatio);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
@ -194,8 +194,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// MOG with learning
|
// MOG with learning
|
||||||
|
|
||||||
template <typename SrcT, typename WorkT>
|
template <typename SrcT, typename WorkT>
|
||||||
__global__ void mog_withLearning(const DevMem2D_<SrcT> frame, PtrStepb fgmask,
|
__global__ void mog_withLearning(const PtrStepSz<SrcT> frame, PtrStepb fgmask,
|
||||||
PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep_<WorkT> gmm_mean, PtrStep_<WorkT> gmm_var,
|
PtrStepf gmm_weight, PtrStepf gmm_sortKey, PtrStep<WorkT> gmm_mean, PtrStep<WorkT> gmm_var,
|
||||||
const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar)
|
const int nmixtures, const float varThreshold, const float backgroundRatio, const float learningRate, const float minVar)
|
||||||
{
|
{
|
||||||
const float w0 = 0.05f;
|
const float w0 = 0.05f;
|
||||||
@ -324,7 +324,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT, typename WorkT>
|
template <typename SrcT, typename WorkT>
|
||||||
void mog_withLearning_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var,
|
void mog_withLearning_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var,
|
||||||
int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar,
|
int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@ -333,8 +333,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(mog_withLearning<SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask,
|
mog_withLearning<SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask,
|
||||||
weight, sortKey, (DevMem2D_<WorkT>) mean, (DevMem2D_<WorkT>) var,
|
weight, sortKey, (PtrStepSz<WorkT>) mean, (PtrStepSz<WorkT>) var,
|
||||||
nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
|
nmixtures, varThreshold, backgroundRatio, learningRate, minVar);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
@ -346,10 +346,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// MOG
|
// MOG
|
||||||
|
|
||||||
void mog_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream)
|
void mog_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*withoutLearning_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream);
|
typedef void (*withoutLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, cudaStream_t stream);
|
||||||
typedef void (*withLearning_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Df weight, DevMem2Df sortKey, DevMem2Db mean, DevMem2Db var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream);
|
typedef void (*withLearning_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzf weight, PtrStepSzf sortKey, PtrStepSzb mean, PtrStepSzb var, int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar, cudaStream_t stream);
|
||||||
|
|
||||||
static const withoutLearning_t withoutLearning[] =
|
static const withoutLearning_t withoutLearning[] =
|
||||||
{
|
{
|
||||||
@ -369,7 +369,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename WorkT, typename OutT>
|
template <typename WorkT, typename OutT>
|
||||||
__global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep_<WorkT> gmm_mean, DevMem2D_<OutT> dst, const int nmixtures, const float backgroundRatio)
|
__global__ void getBackgroundImage(const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStepSz<OutT> dst, const int nmixtures, const float backgroundRatio)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -399,23 +399,23 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename WorkT, typename OutT>
|
template <typename WorkT, typename OutT>
|
||||||
void getBackgroundImage_caller(DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
|
void getBackgroundImage_caller(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage<WorkT, OutT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (DevMem2D_<WorkT>) mean, (DevMem2D_<OutT>) dst, nmixtures, backgroundRatio);
|
getBackgroundImage<WorkT, OutT><<<grid, block, 0, stream>>>(weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst, nmixtures, backgroundRatio);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void getBackgroundImage_gpu(int cn, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
|
void getBackgroundImage_gpu(int cn, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, int nmixtures, float backgroundRatio, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
@ -455,8 +455,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool detectShadows, typename SrcT, typename WorkT>
|
template <bool detectShadows, typename SrcT, typename WorkT>
|
||||||
__global__ void mog2(const DevMem2D_<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
|
__global__ void mog2(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStepb modesUsed,
|
||||||
PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep_<WorkT> gmm_mean,
|
PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep<WorkT> gmm_mean,
|
||||||
const float alphaT, const float alpha1, const float prune)
|
const float alphaT, const float alpha1, const float prune)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -653,7 +653,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT, typename WorkT>
|
template <typename SrcT, typename WorkT>
|
||||||
void mog2_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean,
|
void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
|
||||||
float alphaT, float prune, bool detectShadows, cudaStream_t stream)
|
float alphaT, float prune, bool detectShadows, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -665,16 +665,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(mog2<true, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, modesUsed,
|
mog2<true, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
|
||||||
weight, variance, (DevMem2D_<WorkT>) mean,
|
weight, variance, (PtrStepSz<WorkT>) mean,
|
||||||
alphaT, alpha1, prune);
|
alphaT, alpha1, prune);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(mog2<false, SrcT, WorkT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, modesUsed,
|
mog2<false, SrcT, WorkT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, modesUsed,
|
||||||
weight, variance, (DevMem2D_<WorkT>) mean,
|
weight, variance, (PtrStepSz<WorkT>) mean,
|
||||||
alphaT, alpha1, prune);
|
alphaT, alpha1, prune);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -684,10 +684,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void mog2_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean,
|
void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean,
|
||||||
float alphaT, float prune, bool detectShadows, cudaStream_t stream)
|
float alphaT, float prune, bool detectShadows, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Df variance, DevMem2Db mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
@ -698,7 +698,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename WorkT, typename OutT>
|
template <typename WorkT, typename OutT>
|
||||||
__global__ void getBackgroundImage2(const DevMem2Db modesUsed, const PtrStepf gmm_weight, const PtrStep_<WorkT> gmm_mean, PtrStep_<OutT> dst)
|
__global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep<WorkT> gmm_mean, PtrStep<OutT> dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -730,23 +730,23 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename WorkT, typename OutT>
|
template <typename WorkT, typename OutT>
|
||||||
void getBackgroundImage2_caller(DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream)
|
void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
|
dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y));
|
||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2<WorkT, OutT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (DevMem2D_<WorkT>) mean, (DevMem2D_<OutT>) dst);
|
getBackgroundImage2<WorkT, OutT><<<grid, block, 0, stream>>>(modesUsed, weight, (PtrStepSz<WorkT>) mean, (PtrStepSz<OutT>) dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void getBackgroundImage2_gpu(int cn, DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream)
|
void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2Db modesUsed, DevMem2Df weight, DevMem2Db mean, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
|
@ -90,7 +90,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT, typename SampleT>
|
template <typename SrcT, typename SampleT>
|
||||||
__global__ void init(const DevMem2D_<SrcT> frame, PtrStep_<SampleT> samples, PtrStep_<uint> randStates)
|
__global__ void init(const PtrStepSz<SrcT> frame, PtrStep<SampleT> samples, PtrStep<uint> randStates)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -116,23 +116,23 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT, typename SampleT>
|
template <typename SrcT, typename SampleT>
|
||||||
void init_caller(DevMem2Db frame, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
|
void init_caller(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(init<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
init<SrcT, SampleT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, (DevMem2D_<SampleT>) samples, randStates);
|
init<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, (PtrStepSz<SampleT>) samples, randStates);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_gpu(DevMem2Db frame, int cn, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
|
void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2Db frame, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
0, init_caller<uchar, uchar>, 0, init_caller<uchar3, uchar4>, init_caller<uchar4, uchar4>
|
0, init_caller<uchar, uchar>, 0, init_caller<uchar3, uchar4>, init_caller<uchar4, uchar4>
|
||||||
@ -155,7 +155,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT, typename SampleT>
|
template <typename SrcT, typename SampleT>
|
||||||
__global__ void update(const DevMem2D_<SrcT> frame, PtrStepb fgmask, PtrStep_<SampleT> samples, PtrStep_<uint> randStates)
|
__global__ void update(const PtrStepSz<SrcT> frame, PtrStepb fgmask, PtrStep<SampleT> samples, PtrStep<uint> randStates)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -225,23 +225,23 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename SrcT, typename SampleT>
|
template <typename SrcT, typename SampleT>
|
||||||
void update_caller(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
|
void update_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));
|
||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(update<SrcT, SampleT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
update<SrcT, SampleT><<<grid, block, 0, stream>>>((DevMem2D_<SrcT>) frame, fgmask, (DevMem2D_<SampleT>) samples, randStates);
|
update<SrcT, SampleT><<<grid, block, 0, stream>>>((PtrStepSz<SrcT>) frame, fgmask, (PtrStepSz<SampleT>) samples, randStates);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_gpu(DevMem2Db frame, int cn, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream)
|
void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2Db frame, DevMem2Db fgmask, DevMem2Db samples, DevMem2D_<uint> randStates, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz<uint> randStates, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
0, update_caller<uchar, uchar>, 0, update_caller<uchar3, uchar4>, update_caller<uchar4, uchar4>
|
0, update_caller<uchar, uchar>, 0, update_caller<uchar3, uchar4>, update_caller<uchar4, uchar4>
|
||||||
|
@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
__constant__ short cedge_disc;
|
__constant__ short cedge_disc;
|
||||||
__constant__ short cmax_disc;
|
__constant__ short cmax_disc;
|
||||||
|
|
||||||
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
void load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
||||||
@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_caller(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -213,12 +213,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_gpu(PtrStepSzb disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_gpu(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||||
}
|
}
|
||||||
|
@ -66,8 +66,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void call(const DevMem2D_<float3> src, const float* rot,
|
void call(const PtrStepSz<float3> src, const float* rot,
|
||||||
const float* transl, DevMem2D_<float3> dst,
|
const float* transl, PtrStepSz<float3> dst,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||||
@ -103,8 +103,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void call(const DevMem2D_<float3> src, const float* rot,
|
void call(const PtrStepSz<float3> src, const float* rot,
|
||||||
const float* transl, const float* proj, DevMem2D_<float2> dst,
|
const float* transl, const float* proj, PtrStepSz<float2> dst,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||||
|
@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template<typename T, typename F>
|
template<typename T, typename F>
|
||||||
__global__ void computeConnectivity(const DevMem2D_<T> image, DevMem2D components, F connected)
|
__global__ void computeConnectivity(const PtrStepSz<T> image, PtrStepSzb components, F connected)
|
||||||
{
|
{
|
||||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template< typename T>
|
template< typename T>
|
||||||
void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream)
|
void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
|
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
|
||||||
dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y));
|
dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y));
|
||||||
@ -210,23 +210,23 @@ namespace cv { namespace gpu { namespace device
|
|||||||
typedef InInterval<typename IntervalsTraits<T>::dist_type, IntervalsTraits<T>::ch> Int_t;
|
typedef InInterval<typename IntervalsTraits<T>::dist_type, IntervalsTraits<T>::ch> Int_t;
|
||||||
|
|
||||||
Int_t inInt(lo, hi);
|
Int_t inInt(lo, hi);
|
||||||
computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const DevMem2D_<T> >(image), edges, inInt);
|
computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const PtrStepSz<T> >(image), edges, inInt);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void computeEdges<uchar> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<uchar> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
template void computeEdges<uchar3> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<uchar3> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
template void computeEdges<uchar4> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<uchar4> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
template void computeEdges<ushort> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<ushort> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
template void computeEdges<ushort3>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<ushort3>(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
template void computeEdges<ushort4>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<ushort4>(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
template void computeEdges<int> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<int> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
template void computeEdges<float> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
template void computeEdges<float> (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
|
|
||||||
__global__ void lableTiles(const DevMem2D edges, DevMem2Di comps)
|
__global__ void lableTiles(const PtrStepSzb edges, PtrStepSzi comps)
|
||||||
{
|
{
|
||||||
int x = threadIdx.x + blockIdx.x * TILE_COLS;
|
int x = threadIdx.x + blockIdx.x * TILE_COLS;
|
||||||
int y = threadIdx.y + blockIdx.y * TILE_ROWS;
|
int y = threadIdx.y + blockIdx.y * TILE_ROWS;
|
||||||
@ -360,7 +360,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ __forceinline__ int root(const DevMem2Di& comps, int label)
|
__device__ __forceinline__ int root(const PtrStepSzi& comps, int label)
|
||||||
{
|
{
|
||||||
while(1)
|
while(1)
|
||||||
{
|
{
|
||||||
@ -376,7 +376,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return label;
|
return label;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ __forceinline__ void isConnected(DevMem2Di& comps, int l1, int l2, bool& changed)
|
__device__ __forceinline__ void isConnected(PtrStepSzi& comps, int l1, int l2, bool& changed)
|
||||||
{
|
{
|
||||||
int r1 = root(comps, l1);
|
int r1 = root(comps, l1);
|
||||||
int r2 = root(comps, l2);
|
int r2 = root(comps, l2);
|
||||||
@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
__global__ void crossMerge(const int tilesNumY, const int tilesNumX, int tileSizeY, int tileSizeX,
|
__global__ void crossMerge(const int tilesNumY, const int tilesNumX, int tileSizeY, int tileSizeX,
|
||||||
const DevMem2D edges, DevMem2Di comps, const int yIncomplete, int xIncomplete)
|
const PtrStepSzb edges, PtrStepSzi comps, const int yIncomplete, int xIncomplete)
|
||||||
{
|
{
|
||||||
int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
int stride = blockDim.y * blockDim.x;
|
int stride = blockDim.y * blockDim.x;
|
||||||
@ -482,7 +482,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
} while (Emulation::syncthreadsOr(changed));
|
} while (Emulation::syncthreadsOr(changed));
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void flatten(const DevMem2D edges, DevMem2Di comps)
|
__global__ void flatten(const PtrStepSzb edges, PtrStepSzi comps)
|
||||||
{
|
{
|
||||||
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||||
@ -493,7 +493,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1};
|
enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1};
|
||||||
|
|
||||||
void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream)
|
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
|
dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
|
||||||
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
|
dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS));
|
||||||
|
@ -222,12 +222,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
|
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
|
||||||
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream) \
|
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) \
|
||||||
{ \
|
{ \
|
||||||
traits::functor_type functor = traits::create_functor(); \
|
traits::functor_type functor = traits::create_functor(); \
|
||||||
typedef typename traits::functor_type::argument_type src_t; \
|
typedef typename traits::functor_type::argument_type src_t; \
|
||||||
typedef typename traits::functor_type::result_type dst_t; \
|
typedef typename traits::functor_type::result_type dst_t; \
|
||||||
cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, WithOutMask(), stream); \
|
cv::gpu::device::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
||||||
|
@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int KSIZE, typename T, typename D, typename B>
|
template <int KSIZE, typename T, typename D, typename B>
|
||||||
__global__ void linearColumnFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
__global__ void linearColumnFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||||
const int BLOCK_DIM_X = 16;
|
const int BLOCK_DIM_X = 16;
|
||||||
@ -125,7 +125,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||||
void linearColumnFilter_caller(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream)
|
void linearColumnFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int BLOCK_DIM_X;
|
int BLOCK_DIM_X;
|
||||||
int BLOCK_DIM_Y;
|
int BLOCK_DIM_Y;
|
||||||
@ -158,9 +158,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);
|
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[5][33] =
|
static const caller_t callers[5][33] =
|
||||||
{
|
{
|
||||||
@ -343,13 +343,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);
|
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearColumnFilter_gpu<float , uchar >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , uchar >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float4, uchar4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float4, uchar4>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float3, short3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float3, short3>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , int >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , int >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
} // namespace column_filter
|
} // namespace column_filter
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -47,7 +47,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
|
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, PtrStepSz<T> dst, int top, int left)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -58,7 +58,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
|
template <template <typename> class B, typename T> struct CopyMakeBorderDispatcher
|
||||||
{
|
{
|
||||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, int top, int left,
|
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, int top, int left,
|
||||||
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
|
const typename VecTraits<T>::elem_type* borderValue, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -75,12 +75,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode,
|
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode,
|
||||||
const T* borderValue, cudaStream_t stream)
|
const T* borderValue, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type vec_type;
|
typedef typename TypeVec<T, cn>::vec_type vec_type;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2D_<vec_type>& src, const DevMem2D_<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSz<vec_type>& src, const PtrStepSz<vec_type>& dst, int top, int left, const T* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[5] =
|
static const caller_t callers[5] =
|
||||||
{
|
{
|
||||||
@ -91,37 +91,37 @@ namespace cv { namespace gpu { namespace device
|
|||||||
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
|
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
|
||||||
};
|
};
|
||||||
|
|
||||||
callers[borderMode](DevMem2D_<vec_type>(src), DevMem2D_<vec_type>(dst), top, left, borderValue, stream);
|
callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<uchar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<uchar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<uchar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<uchar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<uchar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<uchar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<uchar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<uchar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const uchar* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
//template void copyMakeBorder_gpu<schar, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<schar, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<schar, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<schar, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<schar, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const schar* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<ushort, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<ushort, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<ushort, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<ushort, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<ushort, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<ushort, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<ushort, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<ushort, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const ushort* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<short, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<short, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<short, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<short, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<short, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<short, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<short, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<short, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const short* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
//template void copyMakeBorder_gpu<int, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<int, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<int, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<int, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<int, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const int* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
template void copyMakeBorder_gpu<float, 1>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 1>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
//template void copyMakeBorder_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<float, 2>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 3>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 4>(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
} // namespace imgproc
|
} // namespace imgproc
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -49,7 +49,7 @@ namespace cv { namespace gpu {
|
|||||||
namespace device
|
namespace device
|
||||||
{
|
{
|
||||||
template <typename D>
|
template <typename D>
|
||||||
__global__ void Bayer2BGR_8u(const PtrStepb src, DevMem2D_<D> dst, const bool blue_last, const bool start_with_green)
|
__global__ void Bayer2BGR_8u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
|
||||||
{
|
{
|
||||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -193,7 +193,7 @@ namespace cv { namespace gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename D>
|
template <typename D>
|
||||||
__global__ void Bayer2BGR_16u(const PtrStepb src, DevMem2D_<D> dst, const bool blue_last, const bool start_with_green)
|
__global__ void Bayer2BGR_16u(const PtrStepb src, PtrStepSz<D> dst, const bool blue_last, const bool start_with_green)
|
||||||
{
|
{
|
||||||
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -287,7 +287,7 @@ namespace cv { namespace gpu {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void Bayer2BGR_8u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
||||||
|
|
||||||
@ -296,14 +296,14 @@ namespace cv { namespace gpu {
|
|||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (DevMem2D_<dst_t>)dst, blue_last, start_with_green);
|
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void Bayer2BGR_16u_gpu(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
|
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
|
||||||
|
|
||||||
@ -312,16 +312,16 @@ namespace cv { namespace gpu {
|
|||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (DevMem2D_<dst_t>)dst, blue_last, start_with_green);
|
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void Bayer2BGR_8u_gpu<3>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||||
template void Bayer2BGR_8u_gpu<4>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||||
template void Bayer2BGR_16u_gpu<3>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||||
template void Bayer2BGR_16u_gpu<4>(DevMem2Db src, DevMem2Db dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool calcScore, class Mask>
|
template <bool calcScore, class Mask>
|
||||||
__global__ void calcKeypoints(const DevMem2Db img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
|
__global__ void calcKeypoints(const PtrStepSzb img, const Mask mask, short2* kpLoc, const unsigned int maxKeypoints, PtrStepi score, const int threshold)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||||
|
|
||||||
@ -282,7 +282,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int calcKeypoints_gpu(DevMem2Db img, DevMem2Db mask, short2* kpLoc, int maxKeypoints, DevMem2Di score, int threshold)
|
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
|
||||||
{
|
{
|
||||||
void* counter_ptr;
|
void* counter_ptr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||||
@ -323,7 +323,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// nonmaxSupression
|
// nonmaxSupression
|
||||||
|
|
||||||
__global__ void nonmaxSupression(const short2* kpLoc, int count, const DevMem2Di scoreMat, short2* locFinal, float* responseFinal)
|
__global__ void nonmaxSupression(const short2* kpLoc, int count, const PtrStepSzi scoreMat, short2* locFinal, float* responseFinal)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 110)
|
||||||
|
|
||||||
@ -359,7 +359,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int nonmaxSupression_gpu(const short2* kpLoc, int count, DevMem2Di score, short2* loc, float* response)
|
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
|
||||||
{
|
{
|
||||||
void* counter_ptr;
|
void* counter_ptr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||||
|
@ -81,7 +81,7 @@ namespace bgfg
|
|||||||
|
|
||||||
|
|
||||||
template <typename PT, typename CT>
|
template <typename PT, typename CT>
|
||||||
__global__ void calcPartialHistogram(const DevMem2D_<PT> prevFrame, const PtrStep_<CT> curFrame, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2)
|
__global__ void calcPartialHistogram(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2)
|
||||||
{
|
{
|
||||||
#if (__CUDA_ARCH__ < 200)
|
#if (__CUDA_ARCH__ < 200)
|
||||||
const int HISTOGRAM_WARP_COUNT = 4;
|
const int HISTOGRAM_WARP_COUNT = 4;
|
||||||
@ -240,7 +240,7 @@ namespace bgfg
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename PT, typename CT>
|
template <typename PT, typename CT>
|
||||||
void calcDiffHistogram_gpu(DevMem2Db prevFrame, DevMem2Db curFrame,
|
void calcDiffHistogram_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame,
|
||||||
unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
|
unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
|
||||||
unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
|
unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
|
||||||
int cc, cudaStream_t stream)
|
int cc, cudaStream_t stream)
|
||||||
@ -249,7 +249,7 @@ namespace bgfg
|
|||||||
const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE;
|
const int HISTOGRAM_THREADBLOCK_SIZE = HISTOGRAM_WARP_COUNT * WARP_SIZE;
|
||||||
|
|
||||||
calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>(
|
calcPartialHistogram<PT, CT><<<PARTIAL_HISTOGRAM_COUNT, HISTOGRAM_THREADBLOCK_SIZE, 0, stream>>>(
|
||||||
(DevMem2D_<PT>)prevFrame, (DevMem2D_<CT>)curFrame, partialBuf0, partialBuf1, partialBuf2);
|
(PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, partialBuf0, partialBuf1, partialBuf2);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
mergeHistogram<<<HISTOGRAM_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(partialBuf0, partialBuf1, partialBuf2, hist0, hist1, hist2);
|
mergeHistogram<<<HISTOGRAM_BIN_COUNT, MERGE_THREADBLOCK_SIZE, 0, stream>>>(partialBuf0, partialBuf1, partialBuf2, hist0, hist1, hist2);
|
||||||
@ -259,16 +259,16 @@ namespace bgfg
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void calcDiffHistogram_gpu<uchar3, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
template void calcDiffHistogram_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
||||||
template void calcDiffHistogram_gpu<uchar3, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
template void calcDiffHistogram_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
||||||
template void calcDiffHistogram_gpu<uchar4, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
template void calcDiffHistogram_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
||||||
template void calcDiffHistogram_gpu<uchar4, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
template void calcDiffHistogram_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
// calcDiffThreshMask
|
// calcDiffThreshMask
|
||||||
|
|
||||||
template <typename PT, typename CT>
|
template <typename PT, typename CT>
|
||||||
__global__ void calcDiffThreshMask(const DevMem2D_<PT> prevFrame, const PtrStep_<CT> curFrame, uchar3 bestThres, PtrStepb changeMask)
|
__global__ void calcDiffThreshMask(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame, uchar3 bestThres, PtrStepb changeMask)
|
||||||
{
|
{
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -290,22 +290,22 @@ namespace bgfg
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename PT, typename CT>
|
template <typename PT, typename CT>
|
||||||
void calcDiffThreshMask_gpu(DevMem2Db prevFrame, DevMem2Db curFrame, uchar3 bestThres, DevMem2Db changeMask, cudaStream_t stream)
|
void calcDiffThreshMask_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
|
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
|
||||||
|
|
||||||
calcDiffThreshMask<PT, CT><<<grid, block, 0, stream>>>((DevMem2D_<PT>)prevFrame, (DevMem2D_<CT>)curFrame, bestThres, changeMask);
|
calcDiffThreshMask<PT, CT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame, bestThres, changeMask);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void calcDiffThreshMask_gpu<uchar3, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, uchar3 bestThres, DevMem2Db changeMask, cudaStream_t stream);
|
template void calcDiffThreshMask_gpu<uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
|
||||||
template void calcDiffThreshMask_gpu<uchar3, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, uchar3 bestThres, DevMem2Db changeMask, cudaStream_t stream);
|
template void calcDiffThreshMask_gpu<uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
|
||||||
template void calcDiffThreshMask_gpu<uchar4, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, uchar3 bestThres, DevMem2Db changeMask, cudaStream_t stream);
|
template void calcDiffThreshMask_gpu<uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
|
||||||
template void calcDiffThreshMask_gpu<uchar4, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, uchar3 bestThres, DevMem2Db changeMask, cudaStream_t stream);
|
template void calcDiffThreshMask_gpu<uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, uchar3 bestThres, PtrStepSzb changeMask, cudaStream_t stream);
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
// bgfgClassification
|
// bgfgClassification
|
||||||
@ -334,7 +334,7 @@ namespace bgfg
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename PT, typename CT, typename OT>
|
template <typename PT, typename CT, typename OT>
|
||||||
__global__ void bgfgClassification(const DevMem2D_<PT> prevFrame, const PtrStep_<CT> curFrame,
|
__global__ void bgfgClassification(const PtrStepSz<PT> prevFrame, const PtrStep<CT> curFrame,
|
||||||
const PtrStepb Ftd, const PtrStepb Fbd, PtrStepb foreground,
|
const PtrStepb Ftd, const PtrStepb Fbd, PtrStepb foreground,
|
||||||
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc)
|
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc)
|
||||||
{
|
{
|
||||||
@ -413,7 +413,7 @@ namespace bgfg
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename PT, typename CT, typename OT>
|
template <typename PT, typename CT, typename OT>
|
||||||
void bgfgClassification_gpu(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground,
|
void bgfgClassification_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground,
|
||||||
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream)
|
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
@ -421,7 +421,7 @@ namespace bgfg
|
|||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(bgfgClassification<PT, CT, OT>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
bgfgClassification<PT, CT, OT><<<grid, block, 0, stream>>>((DevMem2D_<PT>)prevFrame, (DevMem2D_<CT>)curFrame,
|
bgfgClassification<PT, CT, OT><<<grid, block, 0, stream>>>((PtrStepSz<PT>)prevFrame, (PtrStepSz<CT>)curFrame,
|
||||||
Ftd, Fbd, foreground,
|
Ftd, Fbd, foreground,
|
||||||
deltaC, deltaCC, alpha2, N1c, N1cc);
|
deltaC, deltaCC, alpha2, N1c, N1cc);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
@ -430,21 +430,21 @@ namespace bgfg
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void bgfgClassification_gpu<uchar3, uchar3, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
template void bgfgClassification_gpu<uchar3, uchar3, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar3, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
template void bgfgClassification_gpu<uchar3, uchar4, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar3, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
template void bgfgClassification_gpu<uchar3, uchar4, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar3, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
template void bgfgClassification_gpu<uchar4, uchar3, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar4, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
template void bgfgClassification_gpu<uchar4, uchar3, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar4, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
template void bgfgClassification_gpu<uchar4, uchar4, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar4, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
template void bgfgClassification_gpu<uchar4, uchar4, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
template void bgfgClassification_gpu<uchar4, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// updateBackgroundModel
|
// updateBackgroundModel
|
||||||
|
|
||||||
template <typename PT, typename CT, typename OT, class PrevFramePtr2D, class CurFramePtr2D, class FtdPtr2D, class FbdPtr2D>
|
template <typename PT, typename CT, typename OT, class PrevFramePtr2D, class CurFramePtr2D, class FtdPtr2D, class FbdPtr2D>
|
||||||
__global__ void updateBackgroundModel(int cols, int rows, const PrevFramePtr2D prevFrame, const CurFramePtr2D curFrame, const FtdPtr2D Ftd, const FbdPtr2D Fbd,
|
__global__ void updateBackgroundModel(int cols, int rows, const PrevFramePtr2D prevFrame, const CurFramePtr2D curFrame, const FtdPtr2D Ftd, const FbdPtr2D Fbd,
|
||||||
PtrStepb foreground, PtrStep_<OT> background,
|
PtrStepb foreground, PtrStep<OT> background,
|
||||||
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T)
|
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T)
|
||||||
{
|
{
|
||||||
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -803,16 +803,16 @@ namespace bgfg
|
|||||||
template <typename PT, typename CT, typename OT>
|
template <typename PT, typename CT, typename OT>
|
||||||
struct UpdateBackgroundModel
|
struct UpdateBackgroundModel
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<PT> prevFrame, DevMem2D_<CT> curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2D_<OT> background,
|
static void call(PtrStepSz<PT> prevFrame, PtrStepSz<CT> curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSz<OT> background,
|
||||||
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
|
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
|
dim3 grid(divUp(prevFrame.cols, block.x), divUp(prevFrame.rows, block.y));
|
||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep_<PT>, PtrStep_<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb>, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
updateBackgroundModel<PT, CT, OT, PtrStep_<PT>, PtrStep_<CT>, PtrStepb, PtrStepb><<<grid, block, 0, stream>>>(
|
updateBackgroundModel<PT, CT, OT, PtrStep<PT>, PtrStep<CT>, PtrStepb, PtrStepb><<<grid, block, 0, stream>>>(
|
||||||
prevFrame.cols, prevFrame.rows,
|
prevFrame.cols, prevFrame.rows,
|
||||||
prevFrame, curFrame,
|
prevFrame, curFrame,
|
||||||
Ftd, Fbd, foreground, background,
|
Ftd, Fbd, foreground, background,
|
||||||
@ -825,20 +825,20 @@ namespace bgfg
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename PT, typename CT, typename OT>
|
template <typename PT, typename CT, typename OT>
|
||||||
void updateBackgroundModel_gpu(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background,
|
void updateBackgroundModel_gpu(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background,
|
||||||
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
|
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
UpdateBackgroundModel<PT, CT, OT>::call(DevMem2D_<PT>(prevFrame), DevMem2D_<CT>(curFrame), Ftd, Fbd, foreground, DevMem2D_<OT>(background),
|
UpdateBackgroundModel<PT, CT, OT>::call(PtrStepSz<PT>(prevFrame), PtrStepSz<CT>(curFrame), Ftd, Fbd, foreground, PtrStepSz<OT>(background),
|
||||||
deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T, stream);
|
deltaC, deltaCC, alpha1, alpha2, alpha3, N1c, N1cc, N2c, N2cc, T, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void updateBackgroundModel_gpu<uchar3, uchar3, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar3, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
template void updateBackgroundModel_gpu<uchar3, uchar3, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar3, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
template void updateBackgroundModel_gpu<uchar3, uchar4, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar3, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
template void updateBackgroundModel_gpu<uchar3, uchar4, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar3, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
template void updateBackgroundModel_gpu<uchar4, uchar3, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar4, uchar3, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
template void updateBackgroundModel_gpu<uchar4, uchar3, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar4, uchar3, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
template void updateBackgroundModel_gpu<uchar4, uchar4, uchar3>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar4, uchar4, uchar3>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
template void updateBackgroundModel_gpu<uchar4, uchar4, uchar4>(DevMem2Db prevFrame, DevMem2Db curFrame, DevMem2Db Ftd, DevMem2Db Fbd, DevMem2Db foreground, DevMem2Db background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
template void updateBackgroundModel_gpu<uchar4, uchar4, uchar4>(PtrStepSzb prevFrame, PtrStepSzb curFrame, PtrStepSzb Ftd, PtrStepSzb Fbd, PtrStepSzb foreground, PtrStepSzb background, int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#ifndef __FGD_BGFG_COMMON_HPP__
|
#ifndef __FGD_BGFG_COMMON_HPP__
|
||||||
#define __FGD_BGFG_COMMON_HPP__
|
#define __FGD_BGFG_COMMON_HPP__
|
||||||
|
|
||||||
#include "opencv2/core/devmem2d.hpp"
|
#include "opencv2/core/cuda_devptrs.hpp"
|
||||||
|
|
||||||
namespace bgfg
|
namespace bgfg
|
||||||
{
|
{
|
||||||
@ -122,24 +122,24 @@ namespace bgfg
|
|||||||
const int HISTOGRAM_BIN_COUNT = 256;
|
const int HISTOGRAM_BIN_COUNT = 256;
|
||||||
|
|
||||||
template <typename PT, typename CT>
|
template <typename PT, typename CT>
|
||||||
void calcDiffHistogram_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame,
|
void calcDiffHistogram_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
|
||||||
unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
|
unsigned int* hist0, unsigned int* hist1, unsigned int* hist2,
|
||||||
unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
|
unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename PT, typename CT>
|
template <typename PT, typename CT>
|
||||||
void calcDiffThreshMask_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame, uchar3 bestThres, cv::gpu::DevMem2Db changeMask, cudaStream_t stream);
|
void calcDiffThreshMask_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, uchar3 bestThres, cv::gpu::PtrStepSzb changeMask, cudaStream_t stream);
|
||||||
|
|
||||||
void setBGPixelStat(const BGPixelStat& stat);
|
void setBGPixelStat(const BGPixelStat& stat);
|
||||||
|
|
||||||
template <typename PT, typename CT, typename OT>
|
template <typename PT, typename CT, typename OT>
|
||||||
void bgfgClassification_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame,
|
void bgfgClassification_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
|
||||||
cv::gpu::DevMem2Db Ftd, cv::gpu::DevMem2Db Fbd, cv::gpu::DevMem2Db foreground,
|
cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground,
|
||||||
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename PT, typename CT, typename OT>
|
template <typename PT, typename CT, typename OT>
|
||||||
void updateBackgroundModel_gpu(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame,
|
void updateBackgroundModel_gpu(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame,
|
||||||
cv::gpu::DevMem2Db Ftd, cv::gpu::DevMem2Db Fbd, cv::gpu::DevMem2Db foreground, cv::gpu::DevMem2Db background,
|
cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground, cv::gpu::PtrStepSzb background,
|
||||||
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
|
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
@ -97,7 +97,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#endif // __CUDA_ARCH__ >= 110
|
#endif // __CUDA_ARCH__ >= 110
|
||||||
}
|
}
|
||||||
|
|
||||||
int findCorners_gpu(DevMem2Df eig, float threshold, DevMem2Db mask, float2* corners, int max_count)
|
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
|
||||||
{
|
{
|
||||||
void* counter_ptr;
|
void* counter_ptr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||||
@ -134,7 +134,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void sortCorners_gpu(DevMem2Df eig, float2* corners, int count)
|
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
|
||||||
{
|
{
|
||||||
bindTexture(&eigTex, eig);
|
bindTexture(&eigTex, eig);
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask)
|
|||||||
|
|
||||||
__global__ void calcWobbleSuppressionMapsKernel(
|
__global__ void calcWobbleSuppressionMapsKernel(
|
||||||
const int left, const int idx, const int right, const int width, const int height,
|
const int left, const int idx, const int right, const int width, const int height,
|
||||||
PtrElemStepf mapx, PtrElemStepf mapy)
|
PtrStepf mapx, PtrStepf mapy)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -97,7 +97,7 @@ __global__ void calcWobbleSuppressionMapsKernel(
|
|||||||
|
|
||||||
void calcWobbleSuppressionMaps(
|
void calcWobbleSuppressionMaps(
|
||||||
int left, int idx, int right, int width, int height,
|
int left, int idx, int right, int width, int height,
|
||||||
const float *ml, const float *mr, DevMem2Df mapx, DevMem2Df mapy)
|
const float *ml, const float *mr, PtrStepSzf mapx, PtrStepSzf mapy)
|
||||||
{
|
{
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(cml, ml, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(cml, ml, 9*sizeof(float)));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(cmr, mr, 9*sizeof(float)));
|
cudaSafeCall(cudaMemcpyToSymbol(cmr, mr, 9*sizeof(float)));
|
||||||
|
@ -169,10 +169,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
|
d_Histogram[blockIdx.x] = saturate_cast<int>(data[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void histogram256_gpu(DevMem2Db src, int* hist, uint* buf, cudaStream_t stream)
|
void histogram256_gpu(PtrStepSzb src, int* hist, uint* buf, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
|
histogram256<<<PARTIAL_HISTOGRAM256_COUNT, HISTOGRAM256_THREADBLOCK_SIZE, 0, stream>>>(
|
||||||
DevMem2D_<uint>(src),
|
PtrStepSz<uint>(src),
|
||||||
buf,
|
buf,
|
||||||
static_cast<uint>(src.rows * src.step / sizeof(uint)),
|
static_cast<uint>(src.rows * src.step / sizeof(uint)),
|
||||||
src.cols);
|
src.cols);
|
||||||
@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
__constant__ int c_lut[256];
|
__constant__ int c_lut[256];
|
||||||
|
|
||||||
__global__ void equalizeHist(const DevMem2Db src, PtrStepb dst)
|
__global__ void equalizeHist(const PtrStepSzb src, PtrStepb dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream)
|
void equalizeHist_gpu(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16);
|
dim3 block(16, 16);
|
||||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||||
|
@ -110,8 +110,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
|
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
|
||||||
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrElemStepf grad,
|
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrStepf grad,
|
||||||
const PtrElemStep qangle, float scale, float* block_hists)
|
const PtrStepb qangle, float scale, float* block_hists)
|
||||||
{
|
{
|
||||||
const int block_x = threadIdx.z;
|
const int block_x = threadIdx.z;
|
||||||
const int cell_x = threadIdx.x / 16;
|
const int cell_x = threadIdx.x / 16;
|
||||||
@ -149,8 +149,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
float2 vote = *(const float2*)grad_ptr;
|
float2 vote = *(const float2*)grad_ptr;
|
||||||
uchar2 bin = *(const uchar2*)qangle_ptr;
|
uchar2 bin = *(const uchar2*)qangle_ptr;
|
||||||
|
|
||||||
grad_ptr += grad.step;
|
grad_ptr += grad.step/grad.elemSize();
|
||||||
qangle_ptr += qangle.step;
|
qangle_ptr += qangle.step/qangle.elemSize();
|
||||||
|
|
||||||
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
|
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
|
||||||
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
|
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
|
||||||
@ -188,8 +188,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int height, int width, const DevMem2Df& grad,
|
int height, int width, const PtrStepSzf& grad,
|
||||||
const DevMem2Db& qangle, float sigma, float* block_hists)
|
const PtrStepSzb& qangle, float sigma, float* block_hists)
|
||||||
{
|
{
|
||||||
const int nblocks = 1;
|
const int nblocks = 1;
|
||||||
|
|
||||||
@ -512,7 +512,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <int nthreads>
|
template <int nthreads>
|
||||||
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
||||||
const float* block_hists, PtrElemStepf descriptors)
|
const float* block_hists, PtrStepf descriptors)
|
||||||
{
|
{
|
||||||
// Get left top corner of the window in src
|
// Get left top corner of the window in src
|
||||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||||
@ -532,7 +532,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
||||||
int height, int width, float* block_hists, DevMem2Df descriptors)
|
int height, int width, float* block_hists, PtrStepSzf descriptors)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
|
|
||||||
@ -555,7 +555,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <int nthreads>
|
template <int nthreads>
|
||||||
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
|
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
|
||||||
const int win_block_stride_y, const float* block_hists,
|
const int win_block_stride_y, const float* block_hists,
|
||||||
PtrElemStepf descriptors)
|
PtrStepf descriptors)
|
||||||
{
|
{
|
||||||
// Get left top corner of the window in src
|
// Get left top corner of the window in src
|
||||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||||
@ -581,7 +581,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||||
DevMem2Df descriptors)
|
PtrStepSzf descriptors)
|
||||||
{
|
{
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
|
|
||||||
@ -605,8 +605,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <int nthreads, int correct_gamma>
|
template <int nthreads, int correct_gamma>
|
||||||
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrElemStep img,
|
__global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrStepb img,
|
||||||
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
float angle_scale, PtrStepf grad, PtrStepb qangle)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
@ -707,8 +707,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2Db& img,
|
void compute_gradients_8UC4(int nbins, int height, int width, const PtrStepSzb& img,
|
||||||
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
|
||||||
{
|
{
|
||||||
(void)nbins;
|
(void)nbins;
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
@ -727,8 +727,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int nthreads, int correct_gamma>
|
template <int nthreads, int correct_gamma>
|
||||||
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img,
|
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrStepb img,
|
||||||
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
|
float angle_scale, PtrStepf grad, PtrStepb qangle)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
@ -780,8 +780,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2Db& img,
|
void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img,
|
||||||
float angle_scale, DevMem2Df grad, DevMem2Db qangle, bool correct_gamma)
|
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
|
||||||
{
|
{
|
||||||
(void)nbins;
|
(void)nbins;
|
||||||
const int nthreads = 256;
|
const int nthreads = 256;
|
||||||
@ -807,7 +807,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
texture<uchar4, 2, cudaReadModeNormalizedFloat> resize8UC4_tex;
|
texture<uchar4, 2, cudaReadModeNormalizedFloat> resize8UC4_tex;
|
||||||
texture<uchar, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
|
texture<uchar, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
|
||||||
|
|
||||||
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar> dst, int colOfs)
|
__global__ void resize_for_hog_kernel(float sx, float sy, PtrStepSz<uchar> dst, int colOfs)
|
||||||
{
|
{
|
||||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -816,7 +816,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
dst.ptr(y)[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255;
|
dst.ptr(y)[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar4> dst, int colOfs)
|
__global__ void resize_for_hog_kernel(float sx, float sy, PtrStepSz<uchar4> dst, int colOfs)
|
||||||
{
|
{
|
||||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -829,7 +829,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class T, class TEX>
|
template<class T, class TEX>
|
||||||
static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)
|
static void resize_for_hog(const PtrStepSzb& src, PtrStepSzb dst, TEX& tex)
|
||||||
{
|
{
|
||||||
tex.filterMode = cudaFilterModeLinear;
|
tex.filterMode = cudaFilterModeLinear;
|
||||||
|
|
||||||
@ -852,7 +852,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
float sx = static_cast<float>(src.cols) / dst.cols;
|
float sx = static_cast<float>(src.cols) / dst.cols;
|
||||||
float sy = static_cast<float>(src.rows) / dst.rows;
|
float sy = static_cast<float>(src.rows) / dst.rows;
|
||||||
|
|
||||||
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (DevMem2D_<T>)dst, colOfs);
|
resize_for_hog_kernel<<<grid, threads>>>(sx, sy, (PtrStepSz<T>)dst, colOfs);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
@ -860,7 +860,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
void resize_8UC1(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
||||||
void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
|
void resize_8UC4(const PtrStepSzb& src, PtrStepSzb dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
|
||||||
} // namespace hog
|
} // namespace hog
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
const int PIXELS_PER_THREAD = 16;
|
const int PIXELS_PER_THREAD = 16;
|
||||||
|
|
||||||
__global__ void buildPointList(const DevMem2Db src, unsigned int* list)
|
__global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
|
||||||
{
|
{
|
||||||
__shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
|
__shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
|
||||||
__shared__ int s_qsize[4];
|
__shared__ int s_qsize[4];
|
||||||
@ -112,7 +112,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
list[gidx] = s_queues[threadIdx.y][i];
|
list[gidx] = s_queues[threadIdx.y][i];
|
||||||
}
|
}
|
||||||
|
|
||||||
int buildPointList_gpu(DevMem2Db src, unsigned int* list)
|
int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
|
||||||
{
|
{
|
||||||
void* counterPtr;
|
void* counterPtr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||||
@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
accumRow[i] = smem[i];
|
accumRow[i] = smem[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
|
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
|
||||||
{
|
{
|
||||||
const dim3 block(has20 ? 1024 : 512);
|
const dim3 block(has20 ? 1024 : 512);
|
||||||
const dim3 grid(accum.rows - 2);
|
const dim3 grid(accum.rows - 2);
|
||||||
@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// linesGetResult
|
// linesGetResult
|
||||||
|
|
||||||
__global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
|
__global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
|
||||||
{
|
{
|
||||||
const int r = blockIdx.x * blockDim.x + threadIdx.x;
|
const int r = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int n = blockIdx.y * blockDim.y + threadIdx.y;
|
const int n = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -254,7 +254,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
|
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
|
||||||
{
|
{
|
||||||
void* counterPtr;
|
void* counterPtr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||||
@ -341,7 +341,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, DevMem2Di accum, int minRadius, int maxRadius, float idp)
|
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
|
||||||
{
|
{
|
||||||
const dim3 block(256);
|
const dim3 block(256);
|
||||||
const dim3 grid(divUp(count, block.x));
|
const dim3 grid(divUp(count, block.x));
|
||||||
@ -357,7 +357,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// buildCentersList
|
// buildCentersList
|
||||||
|
|
||||||
__global__ void buildCentersList(const DevMem2Di accum, unsigned int* centers, const int threshold)
|
__global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -381,7 +381,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int buildCentersList_gpu(DevMem2Di accum, unsigned int* centers, int threshold)
|
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
|
||||||
{
|
{
|
||||||
void* counterPtr;
|
void* counterPtr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
|
||||||
@ -467,7 +467,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const dim3 block(has20 ? 1024 : 512);
|
const dim3 block(has20 ? 1024 : 512);
|
||||||
const dim3 grid(centersCount);
|
const dim3 grid(centersCount);
|
||||||
|
|
||||||
const int histSize = ::ceil(maxRadius - minRadius + 1);
|
const int histSize = maxRadius - minRadius + 1;
|
||||||
size_t smemSize = (histSize + 2) * sizeof(int);
|
size_t smemSize = (histSize + 2) * sizeof(int);
|
||||||
|
|
||||||
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
|
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
|
||||||
|
@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
@ -162,7 +162,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
//cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
@ -284,7 +284,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16, 1);
|
dim3 threads(16, 16, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -298,7 +298,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -317,7 +317,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
__constant__ float cq[16];
|
__constant__ float cq[16];
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
__global__ void reprojectImageTo3D(const DevMem2D_<T> disp, PtrStep<D> xyz)
|
__global__ void reprojectImageTo3D(const PtrStepSz<T> disp, PtrStep<D> xyz)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -343,31 +343,31 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void reprojectImageTo3D_gpu(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream)
|
void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
|
dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y));
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) );
|
||||||
|
|
||||||
reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((DevMem2D_<T>)disp, (DevMem2D_<D>)xyz);
|
reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void reprojectImageTo3D_gpu<uchar, float3>(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream);
|
template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||||
template void reprojectImageTo3D_gpu<uchar, float4>(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream);
|
template void reprojectImageTo3D_gpu<uchar, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||||
template void reprojectImageTo3D_gpu<short, float3>(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream);
|
template void reprojectImageTo3D_gpu<short, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||||
template void reprojectImageTo3D_gpu<short, float4>(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream);
|
template void reprojectImageTo3D_gpu<short, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||||
|
|
||||||
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
||||||
|
|
||||||
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
|
|
||||||
__global__ void cornerHarris_kernel(const int block_size, const float k, DevMem2Df dst)
|
__global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -401,7 +401,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename BR, typename BC>
|
template <typename BR, typename BC>
|
||||||
__global__ void cornerHarris_kernel(const int block_size, const float k, DevMem2Df dst, const BR border_row, const BC border_col)
|
__global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst, const BR border_row, const BC border_col)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -438,7 +438,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cornerHarris_gpu(int block_size, float k, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream)
|
void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
|
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
|
||||||
@ -472,7 +472,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
|
|
||||||
__global__ void cornerMinEigenVal_kernel(const int block_size, DevMem2Df dst)
|
__global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -510,7 +510,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename BR, typename BC>
|
template <typename BR, typename BC>
|
||||||
__global__ void cornerMinEigenVal_kernel(const int block_size, DevMem2Df dst, const BR border_row, const BC border_col)
|
__global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst, const BR border_row, const BC border_col)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -550,7 +550,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cornerMinEigenVal_gpu(int block_size, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream)
|
void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
|
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
|
||||||
@ -602,7 +602,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst)
|
void columnSum_32F(const PtrStepSzb src, const PtrStepSzb dst)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(src.cols, threads.x));
|
dim3 grid(divUp(src.cols, threads.x));
|
||||||
@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulSpectrums
|
// mulSpectrums
|
||||||
|
|
||||||
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
|
__global__ void mulSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -629,7 +629,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
@ -645,7 +645,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulSpectrums_CONJ
|
// mulSpectrums_CONJ
|
||||||
|
|
||||||
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c)
|
__global__ void mulSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -657,7 +657,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
@ -673,7 +673,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulAndScaleSpectrums
|
// mulAndScaleSpectrums
|
||||||
|
|
||||||
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c)
|
__global__ void mulAndScaleSpectrumsKernel(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -686,7 +686,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
@ -702,7 +702,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// mulAndScaleSpectrums_CONJ
|
// mulAndScaleSpectrums_CONJ
|
||||||
|
|
||||||
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c)
|
__global__ void mulAndScaleSpectrumsKernel_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -715,7 +715,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream)
|
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(256);
|
dim3 threads(256);
|
||||||
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
dim3 grid(divUp(c.cols, threads.x), divUp(c.rows, threads.y));
|
||||||
@ -830,7 +830,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
const float k_rinv[9], const float r_kinv[9], const float t[3],
|
||||||
float scale, cudaStream_t stream)
|
float scale, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@ -852,7 +852,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@ -873,7 +873,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@ -901,7 +901,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
__constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
|
__constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
template <class SrcT, typename D>
|
template <class SrcT, typename D>
|
||||||
__global__ void filter2D(const SrcT src, DevMem2D_<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
|
__global__ void filter2D(const SrcT src, PtrStepSz<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
|
typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
|
||||||
|
|
||||||
@ -941,7 +941,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}; \
|
}; \
|
||||||
template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
|
template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
|
||||||
{ \
|
{ \
|
||||||
static void call(const DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2D_<D> dst, \
|
static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, \
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
|
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
|
||||||
{ \
|
{ \
|
||||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||||
@ -970,11 +970,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#undef IMPLEMENT_FILTER2D_TEX_READER
|
#undef IMPLEMENT_FILTER2D_TEX_READER
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
|
void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream)
|
int borderMode, const float* borderValue, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(const DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
Filter2DCaller<T, D, BrdReflect101>::call,
|
Filter2DCaller<T, D, BrdReflect101>::call,
|
||||||
@ -986,14 +986,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
cudaSafeCall(cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
||||||
|
|
||||||
funcs[borderMode](static_cast< DevMem2D_<T> >(srcWhole), ofsX, ofsY, static_cast< DevMem2D_<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
|
funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void filter2D_gpu<uchar, uchar>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void filter2D_gpu<uchar4, uchar4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void filter2D_gpu<ushort, ushort>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void filter2D_gpu<ushort4, ushort4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void filter2D_gpu<float, float>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void filter2D_gpu<float4, float4>(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
} // namespace imgproc
|
} // namespace imgproc
|
||||||
}}} // namespace cv { namespace gpu { namespace device {
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void shfl_integral_horizontal(const PtrStep_<uint4> img, PtrStep_<uint4> integral)
|
__global__ void shfl_integral_horizontal(const PtrStep<uint4> img, PtrStep<uint4> integral)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
|
||||||
__shared__ int sums[128];
|
__shared__ int sums[128];
|
||||||
@ -297,7 +297,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// The final set of sums from the block is then propgated, with the block
|
// The final set of sums from the block is then propgated, with the block
|
||||||
// computing "down" the image and adding the running sum to the local
|
// computing "down" the image and adding the running sum to the local
|
||||||
// block sums.
|
// block sums.
|
||||||
__global__ void shfl_integral_vertical(DevMem2D_<unsigned int> integral)
|
__global__ void shfl_integral_vertical(PtrStepSz<unsigned int> integral)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
|
||||||
__shared__ unsigned int sums[32][9];
|
__shared__ unsigned int sums[32][9];
|
||||||
@ -355,7 +355,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void shfl_integral_gpu(DevMem2Db img, DevMem2D_<unsigned int> integral, cudaStream_t stream)
|
void shfl_integral_gpu(PtrStepSzb img, PtrStepSz<unsigned int> integral, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
// each thread handles 16 values, use 1 block/row
|
// each thread handles 16 values, use 1 block/row
|
||||||
@ -366,7 +366,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
|
cudaSafeCall( cudaFuncSetCacheConfig(shfl_integral_horizontal, cudaFuncCachePreferL1) );
|
||||||
|
|
||||||
shfl_integral_horizontal<<<grid, block, 0, stream>>>((DevMem2D_<uint4>) img, (DevMem2D_<uint4>) integral);
|
shfl_integral_horizontal<<<grid, block, 0, stream>>>((PtrStepSz<uint4>) img, (PtrStepSz<uint4>) integral);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,7 +181,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void connectedConmonents(DevMem2D_<int4> candidates, int ncandidates, DevMem2D_<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
|
void connectedConmonents(PtrStepSz<int4> candidates, int ncandidates, PtrStepSz<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
|
||||||
{
|
{
|
||||||
int block = ncandidates;
|
int block = ncandidates;
|
||||||
int smem = block * ( sizeof(int) + sizeof(int4) );
|
int smem = block * ( sizeof(int) + sizeof(int4) );
|
||||||
@ -240,7 +240,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
// stepShift, scale, width_k, sum_prev => y = sum_prev + tid_k / width_k, x = tid_k - tid_k / width_k
|
// stepShift, scale, width_k, sum_prev => y = sum_prev + tid_k / width_k, x = tid_k - tid_k / width_k
|
||||||
__global__ void lbp_cascade(const Cascade cascade, int frameW, int frameH, int windowW, int windowH, float scale, const float factor,
|
__global__ void lbp_cascade(const Cascade cascade, int frameW, int frameH, int windowW, int windowH, float scale, const float factor,
|
||||||
const int total, int* integral, const int pitch, DevMem2D_<int4> objects, unsigned int* classified)
|
const int total, int* integral, const int pitch, PtrStepSz<int4> objects, unsigned int* classified)
|
||||||
{
|
{
|
||||||
int ftid = blockIdx.x * blockDim.x + threadIdx.x;
|
int ftid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
if (ftid >= total) return;
|
if (ftid >= total) return;
|
||||||
@ -285,8 +285,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
void classifyPyramid(int frameW, int frameH, int windowW, int windowH, float initialScale, float factor, int workAmount,
|
void classifyPyramid(int frameW, int frameH, int windowW, int windowH, float initialScale, float factor, int workAmount,
|
||||||
const DevMem2Db& mstages, const int nstages, const DevMem2Di& mnodes, const DevMem2Df& mleaves, const DevMem2Di& msubsets, const DevMem2Db& mfeatures,
|
const PtrStepSzb& mstages, const int nstages, const PtrStepSzi& mnodes, const PtrStepSzf& mleaves, const PtrStepSzi& msubsets, const PtrStepSzb& mfeatures,
|
||||||
const int subsetSize, DevMem2D_<int4> objects, unsigned int* classified, DevMem2Di integral)
|
const int subsetSize, PtrStepSz<int4> objects, unsigned int* classified, PtrStepSzi integral)
|
||||||
{
|
{
|
||||||
const int block = 128;
|
const int block = 128;
|
||||||
int grid = divUp(workAmount, block);
|
int grid = divUp(workAmount, block);
|
||||||
|
@ -81,7 +81,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Naive_CCORR
|
// Naive_CCORR
|
||||||
|
|
||||||
template <typename T, int cn>
|
template <typename T, int cn>
|
||||||
__global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, DevMem2Df result)
|
__global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||||
@ -106,7 +106,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int cn>
|
template <typename T, int cn>
|
||||||
void matchTemplateNaive_CCORR(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream)
|
void matchTemplateNaive_CCORR(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -118,9 +118,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -131,9 +131,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -147,7 +147,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Naive_SQDIFF
|
// Naive_SQDIFF
|
||||||
|
|
||||||
template <typename T, int cn>
|
template <typename T, int cn>
|
||||||
__global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb image, const PtrStepb templ, DevMem2Df result)
|
__global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||||
@ -176,7 +176,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, int cn>
|
template <typename T, int cn>
|
||||||
void matchTemplateNaive_SQDIFF(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream)
|
void matchTemplateNaive_SQDIFF(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -188,9 +188,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -200,9 +200,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
callers[cn](image, templ, result, stream);
|
callers[cn](image, templ, result, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream)
|
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -216,7 +216,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// Prepared_SQDIFF
|
// Prepared_SQDIFF
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result)
|
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -232,7 +232,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream)
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn,
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
|
||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -286,7 +286,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
|
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
|
||||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||||
unsigned long long templ_sqsum, DevMem2Df result)
|
unsigned long long templ_sqsum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -303,8 +303,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 threads(32, 8);
|
const dim3 threads(32, 8);
|
||||||
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -317,10 +317,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
|
||||||
DevMem2Df result, int cn, cudaStream_t stream)
|
PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);
|
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
|
||||||
@ -332,7 +332,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Prepared_CCOFF
|
// Prepared_CCOFF
|
||||||
|
|
||||||
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, DevMem2Df result)
|
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -347,7 +347,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream)
|
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -365,7 +365,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
|
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
|
||||||
const PtrStep<unsigned int> image_sum_r,
|
const PtrStep<unsigned int> image_sum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<unsigned int> image_sum_g,
|
||||||
DevMem2Df result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -386,10 +386,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC2(
|
void matchTemplatePrepared_CCOFF_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const PtrStepSz<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const PtrStepSz<unsigned int> image_sum_g,
|
||||||
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
unsigned int templ_sum_r, unsigned int templ_sum_g,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -413,7 +413,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const PtrStep<unsigned int> image_sum_r,
|
const PtrStep<unsigned int> image_sum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<unsigned int> image_sum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b,
|
const PtrStep<unsigned int> image_sum_b,
|
||||||
DevMem2Df result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -438,13 +438,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC3(
|
void matchTemplatePrepared_CCOFF_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const PtrStepSz<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const PtrStepSz<unsigned int> image_sum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b,
|
const PtrStepSz<unsigned int> image_sum_b,
|
||||||
unsigned int templ_sum_r,
|
unsigned int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
unsigned int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
unsigned int templ_sum_b,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -473,7 +473,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const PtrStep<unsigned int> image_sum_g,
|
const PtrStep<unsigned int> image_sum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b,
|
const PtrStep<unsigned int> image_sum_b,
|
||||||
const PtrStep<unsigned int> image_sum_a,
|
const PtrStep<unsigned int> image_sum_a,
|
||||||
DevMem2Df result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -502,15 +502,15 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8UC4(
|
void matchTemplatePrepared_CCOFF_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const PtrStepSz<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const PtrStepSz<unsigned int> image_sum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b,
|
const PtrStepSz<unsigned int> image_sum_b,
|
||||||
const DevMem2D_<unsigned int> image_sum_a,
|
const PtrStepSz<unsigned int> image_sum_a,
|
||||||
unsigned int templ_sum_r,
|
unsigned int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
unsigned int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
unsigned int templ_sum_b,
|
||||||
unsigned int templ_sum_a,
|
unsigned int templ_sum_a,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -537,7 +537,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
float templ_sum_scale, float templ_sqsum_scale,
|
float templ_sum_scale, float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum,
|
const PtrStep<unsigned int> image_sum,
|
||||||
const PtrStep<unsigned long long> image_sqsum,
|
const PtrStep<unsigned long long> image_sqsum,
|
||||||
DevMem2Df result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -557,10 +557,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||||
int w, int h, const DevMem2D_<unsigned int> image_sum,
|
int w, int h, const PtrStepSz<unsigned int> image_sum,
|
||||||
const DevMem2D_<unsigned long long> image_sqsum,
|
const PtrStepSz<unsigned long long> image_sqsum,
|
||||||
unsigned int templ_sum, unsigned long long templ_sqsum,
|
unsigned int templ_sum, unsigned long long templ_sqsum,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -586,7 +586,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
float templ_sqsum_scale,
|
float templ_sqsum_scale,
|
||||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||||
DevMem2Df result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -616,11 +616,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -653,7 +653,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
|
||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
||||||
DevMem2Df result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -691,13 +691,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
|
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -734,7 +734,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
|
||||||
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
|
||||||
const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
|
const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
|
||||||
DevMem2Df result)
|
PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -778,15 +778,15 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
|
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||||
const DevMem2D_<unsigned int> image_sum_a, const DevMem2D_<unsigned long long> image_sqsum_a,
|
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||||
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
||||||
DevMem2Df result, cudaStream_t stream)
|
PtrStepSzf result, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -822,7 +822,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void normalizeKernel_8U(
|
__global__ void normalizeKernel_8U(
|
||||||
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
int w, int h, const PtrStep<unsigned long long> image_sqsum,
|
||||||
unsigned long long templ_sqsum, DevMem2Df result)
|
unsigned long long templ_sqsum, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -836,8 +836,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
|
||||||
unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream)
|
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
@ -868,7 +868,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
// extractFirstChannel
|
// extractFirstChannel
|
||||||
|
|
||||||
template <int cn>
|
template <int cn>
|
||||||
__global__ void extractFirstChannel_32F(const PtrStepb image, DevMem2Df result)
|
__global__ void extractFirstChannel_32F(const PtrStepb image, PtrStepSzf result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||||
|
|
||||||
@ -882,7 +882,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream)
|
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8);
|
dim3 threads(32, 8);
|
||||||
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
|
||||||
|
@ -130,7 +130,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Mag, typename Angle>
|
template <typename Mag, typename Angle>
|
||||||
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -149,9 +149,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
|
||||||
static const caller_t callers[2][2][2] =
|
static const caller_t callers[2][2][2] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
@ -180,7 +180,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Mag>
|
template <typename Mag>
|
||||||
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -198,9 +198,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
|
||||||
static const caller_t callers[2] =
|
static const caller_t callers[2] =
|
||||||
{
|
{
|
||||||
polarToCart_caller<NonEmptyMag>,
|
polarToCart_caller<NonEmptyMag>,
|
||||||
|
@ -175,7 +175,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <int nthreads, typename T, typename Mask>
|
template <int nthreads, typename T, typename Mask>
|
||||||
__global__ void minMaxKernel(const DevMem2Db src, Mask mask, T* minval, T* maxval)
|
__global__ void minMaxKernel(const PtrStepSzb src, Mask mask, T* minval, T* maxval)
|
||||||
{
|
{
|
||||||
typedef typename MinMaxTypeTraits<T>::best_type best_type;
|
typedef typename MinMaxTypeTraits<T>::best_type best_type;
|
||||||
__shared__ best_type sminval[nthreads];
|
__shared__ best_type sminval[nthreads];
|
||||||
@ -258,7 +258,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
|
void minMaxMaskCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
@ -279,17 +279,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxMaskCaller<uchar>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskCaller<uchar>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskCaller<char>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskCaller<char>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskCaller<ushort>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskCaller<ushort>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskCaller<short>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskCaller<short>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskCaller<int>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskCaller<int>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskCaller<float>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskCaller<float>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskCaller<double>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskCaller<double>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf)
|
void minMaxCaller(const PtrStepSzb src, double* minval, double* maxval, PtrStepb buf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
@ -310,13 +310,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxCaller<uchar>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxCaller<uchar>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxCaller<char>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxCaller<char>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxCaller<ushort>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxCaller<ushort>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxCaller<short>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxCaller<short>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxCaller<int>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxCaller<int>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxCaller<float>(const DevMem2Db, double*,double*, PtrStepb);
|
template void minMaxCaller<float>(const PtrStepSzb, double*,double*, PtrStepb);
|
||||||
template void minMaxCaller<double>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxCaller<double>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
|
|
||||||
|
|
||||||
template <int nthreads, typename T>
|
template <int nthreads, typename T>
|
||||||
@ -344,7 +344,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
|
void minMaxMaskMultipassCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
@ -367,16 +367,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxMaskMultipassCaller<uchar>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskMultipassCaller<uchar>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskMultipassCaller<char>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskMultipassCaller<char>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskMultipassCaller<ushort>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskMultipassCaller<ushort>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskMultipassCaller<short>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskMultipassCaller<short>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskMultipassCaller<int>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskMultipassCaller<int>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
template void minMaxMaskMultipassCaller<float>(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
template void minMaxMaskMultipassCaller<float>(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMultipassCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf)
|
void minMaxMultipassCaller(const PtrStepSzb src, double* minval, double* maxval, PtrStepb buf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
@ -399,12 +399,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
*maxval = maxval_;
|
*maxval = maxval_;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxMultipassCaller<uchar>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<uchar>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<char>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<char>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<ushort>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<ushort>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<short>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<short>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<int>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<int>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<float>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<float>(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
} // namespace minmax
|
} // namespace minmax
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -493,7 +493,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <int nthreads, typename T, typename Mask>
|
template <int nthreads, typename T, typename Mask>
|
||||||
__global__ void minMaxLocKernel(const DevMem2Db src, Mask mask, T* minval, T* maxval,
|
__global__ void minMaxLocKernel(const PtrStepSzb src, Mask mask, T* minval, T* maxval,
|
||||||
uint* minloc, uint* maxloc)
|
uint* minloc, uint* maxloc)
|
||||||
{
|
{
|
||||||
typedef typename MinMaxTypeTraits<T>::best_type best_type;
|
typedef typename MinMaxTypeTraits<T>::best_type best_type;
|
||||||
@ -586,7 +586,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
void minMaxLocMaskCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -617,17 +617,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxLocMaskCaller<uchar>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskCaller<uchar>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskCaller<char>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskCaller<char>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskCaller<ushort>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskCaller<ushort>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskCaller<short>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskCaller<short>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskCaller<int>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskCaller<int>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskCaller<float>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskCaller<float>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskCaller<double>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskCaller<double>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocCaller(const DevMem2Db src, double* minval, double* maxval,
|
void minMaxLocCaller(const PtrStepSzb src, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -658,13 +658,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxLocCaller<uchar>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocCaller<uchar>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocCaller<char>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocCaller<char>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocCaller<ushort>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocCaller<ushort>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocCaller<short>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocCaller<short>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocCaller<int>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocCaller<int>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocCaller<float>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocCaller<float>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocCaller<double>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocCaller<double>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
|
|
||||||
|
|
||||||
// This kernel will be used only when compute capability is 1.0
|
// This kernel will be used only when compute capability is 1.0
|
||||||
@ -699,7 +699,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
void minMaxLocMaskMultipassCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -732,16 +732,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxLocMaskMultipassCaller<uchar>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskMultipassCaller<uchar>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskMultipassCaller<char>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskMultipassCaller<char>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskMultipassCaller<ushort>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskMultipassCaller<ushort>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskMultipassCaller<short>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskMultipassCaller<short>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskMultipassCaller<int>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskMultipassCaller<int>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMaskMultipassCaller<float>(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMaskMultipassCaller<float>(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocMultipassCaller(const DevMem2Db src, double* minval, double* maxval,
|
void minMaxLocMultipassCaller(const PtrStepSzb src, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
int minloc[2], int maxloc[2], PtrStepb valbuf, PtrStepb locbuf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -774,12 +774,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
||||||
}
|
}
|
||||||
|
|
||||||
template void minMaxLocMultipassCaller<uchar>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<uchar>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<char>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<char>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<ushort>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<ushort>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<short>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<short>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<int>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<int>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<float>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<float>(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
} // namespace minmaxloc
|
} // namespace minmaxloc
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -820,7 +820,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <int nthreads, typename T>
|
template <int nthreads, typename T>
|
||||||
__global__ void countNonZeroKernel(const DevMem2Db src, volatile uint* count)
|
__global__ void countNonZeroKernel(const PtrStepSzb src, volatile uint* count)
|
||||||
{
|
{
|
||||||
__shared__ uint scount[nthreads];
|
__shared__ uint scount[nthreads];
|
||||||
|
|
||||||
@ -875,7 +875,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int countNonZeroCaller(const DevMem2Db src, PtrStepb buf)
|
int countNonZeroCaller(const PtrStepSzb src, PtrStepb buf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
@ -894,13 +894,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
template int countNonZeroCaller<uchar>(const DevMem2Db, PtrStepb);
|
template int countNonZeroCaller<uchar>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroCaller<char>(const DevMem2Db, PtrStepb);
|
template int countNonZeroCaller<char>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroCaller<ushort>(const DevMem2Db, PtrStepb);
|
template int countNonZeroCaller<ushort>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroCaller<short>(const DevMem2Db, PtrStepb);
|
template int countNonZeroCaller<short>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroCaller<int>(const DevMem2Db, PtrStepb);
|
template int countNonZeroCaller<int>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroCaller<float>(const DevMem2Db, PtrStepb);
|
template int countNonZeroCaller<float>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroCaller<double>(const DevMem2Db, PtrStepb);
|
template int countNonZeroCaller<double>(const PtrStepSzb, PtrStepb);
|
||||||
|
|
||||||
|
|
||||||
template <int nthreads, typename T>
|
template <int nthreads, typename T>
|
||||||
@ -920,7 +920,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf)
|
int countNonZeroMultipassCaller(const PtrStepSzb src, PtrStepb buf)
|
||||||
{
|
{
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
estimateThreadCfg(src.cols, src.rows, threads, grid);
|
||||||
@ -941,12 +941,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
template int countNonZeroMultipassCaller<uchar>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<uchar>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroMultipassCaller<char>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<char>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroMultipassCaller<ushort>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<ushort>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroMultipassCaller<short>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<short>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroMultipassCaller<int>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<int>(const PtrStepSzb, PtrStepb);
|
||||||
template int countNonZeroMultipassCaller<float>(const DevMem2Db, PtrStepb);
|
template int countNonZeroMultipassCaller<float>(const PtrStepSzb, PtrStepb);
|
||||||
|
|
||||||
} // namespace countnonzero
|
} // namespace countnonzero
|
||||||
|
|
||||||
@ -1012,7 +1012,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename R, typename Op, int nthreads>
|
template <typename T, typename R, typename Op, int nthreads>
|
||||||
__global__ void sumKernel(const DevMem2Db src, R* result)
|
__global__ void sumKernel(const PtrStepSzb src, R* result)
|
||||||
{
|
{
|
||||||
__shared__ R smem[nthreads];
|
__shared__ R smem[nthreads];
|
||||||
|
|
||||||
@ -1084,7 +1084,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T, typename R, typename Op, int nthreads>
|
template <typename T, typename R, typename Op, int nthreads>
|
||||||
__global__ void sumKernel_C2(const DevMem2Db src, typename TypeVec<R, 2>::vec_type* result)
|
__global__ void sumKernel_C2(const PtrStepSzb src, typename TypeVec<R, 2>::vec_type* result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, 2>::vec_type SrcType;
|
typedef typename TypeVec<T, 2>::vec_type SrcType;
|
||||||
typedef typename TypeVec<R, 2>::vec_type DstType;
|
typedef typename TypeVec<R, 2>::vec_type DstType;
|
||||||
@ -1189,7 +1189,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T, typename R, typename Op, int nthreads>
|
template <typename T, typename R, typename Op, int nthreads>
|
||||||
__global__ void sumKernel_C3(const DevMem2Db src, typename TypeVec<R, 3>::vec_type* result)
|
__global__ void sumKernel_C3(const PtrStepSzb src, typename TypeVec<R, 3>::vec_type* result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, 3>::vec_type SrcType;
|
typedef typename TypeVec<T, 3>::vec_type SrcType;
|
||||||
typedef typename TypeVec<R, 3>::vec_type DstType;
|
typedef typename TypeVec<R, 3>::vec_type DstType;
|
||||||
@ -1303,7 +1303,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename R, typename Op, int nthreads>
|
template <typename T, typename R, typename Op, int nthreads>
|
||||||
__global__ void sumKernel_C4(const DevMem2Db src, typename TypeVec<R, 4>::vec_type* result)
|
__global__ void sumKernel_C4(const PtrStepSzb src, typename TypeVec<R, 4>::vec_type* result)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<T, 4>::vec_type SrcType;
|
typedef typename TypeVec<T, 4>::vec_type SrcType;
|
||||||
typedef typename TypeVec<R, 4>::vec_type DstType;
|
typedef typename TypeVec<R, 4>::vec_type DstType;
|
||||||
@ -1428,7 +1428,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sumMultipassCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
@ -1490,16 +1490,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
sum[3] = result[3];
|
sum[3] = result[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
template void sumMultipassCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumMultipassCaller<uchar>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumMultipassCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumMultipassCaller<char>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumMultipassCaller<ushort>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumMultipassCaller<ushort>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumMultipassCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumMultipassCaller<short>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumMultipassCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumMultipassCaller<int>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumMultipassCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumMultipassCaller<float>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sumCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
@ -1539,16 +1539,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
sum[3] = result[3];
|
sum[3] = result[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
template void sumCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumCaller<uchar>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumCaller<char>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumCaller<ushort>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumCaller<ushort>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumCaller<short>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumCaller<int>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sumCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void sumCaller<float>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void absSumMultipassCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
@ -1610,16 +1610,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
sum[3] = result[3];
|
sum[3] = result[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
template void absSumMultipassCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumMultipassCaller<uchar>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumMultipassCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumMultipassCaller<char>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumMultipassCaller<ushort>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumMultipassCaller<ushort>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumMultipassCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumMultipassCaller<short>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumMultipassCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumMultipassCaller<int>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumMultipassCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumMultipassCaller<float>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void absSumCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
@ -1659,16 +1659,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
sum[3] = result[3];
|
sum[3] = result[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
template void absSumCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumCaller<uchar>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumCaller<char>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumCaller<ushort>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumCaller<ushort>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumCaller<short>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumCaller<int>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void absSumCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void absSumCaller<float>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sqrSumMultipassCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
@ -1730,16 +1730,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
sum[3] = result[3];
|
sum[3] = result[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
template void sqrSumMultipassCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumMultipassCaller<uchar>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumMultipassCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumMultipassCaller<char>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumMultipassCaller<ushort>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumMultipassCaller<ushort>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumMultipassCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumMultipassCaller<short>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumMultipassCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumMultipassCaller<int>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumMultipassCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumMultipassCaller<float>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sqrSumCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
typedef double R;
|
typedef double R;
|
||||||
|
|
||||||
@ -1779,12 +1779,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
sum[3] = result[3];
|
sum[3] = result[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
template void sqrSumCaller<uchar>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<uchar>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<char>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<char>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<ushort>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<ushort>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<short>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<int>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<float>(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
} // namespace sum
|
} // namespace sum
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
@ -1882,7 +1882,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Op, typename T, typename S, typename D> __global__ void reduceRows(const DevMem2D_<T> src, D* dst, const Op op)
|
template <class Op, typename T, typename S, typename D> __global__ void reduceRows(const PtrStepSz<T> src, D* dst, const Op op)
|
||||||
{
|
{
|
||||||
__shared__ S smem[16 * 16];
|
__shared__ S smem[16 * 16];
|
||||||
|
|
||||||
@ -1913,7 +1913,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
dst[x] = saturate_cast<D>(op.result(smem[threadIdx.x * 16], src.rows));
|
dst[x] = saturate_cast<D>(op.result(smem[threadIdx.x * 16], src.rows));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename> class Op, typename T, typename S, typename D> void reduceRows_caller(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream)
|
template <template <typename> class Op, typename T, typename S, typename D> void reduceRows_caller(const PtrStepSz<T>& src, PtrStepSz<D> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
const dim3 grid(divUp(src.cols, block.x));
|
const dim3 grid(divUp(src.cols, block.x));
|
||||||
@ -1927,9 +1927,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream)
|
template <typename T, typename S, typename D> void reduceRows_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSz<T>& src, PtrStepSz<D> dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -1939,29 +1939,29 @@ namespace cv { namespace gpu { namespace device
|
|||||||
reduceRows_caller<MinReductor, T, S, D>
|
reduceRows_caller<MinReductor, T, S, D>
|
||||||
};
|
};
|
||||||
|
|
||||||
callers[reduceOp](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<D> >(dst), stream);
|
callers[reduceOp](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<D> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void reduceRows_gpu<uchar, int, uchar>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<uchar, int, uchar>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<uchar, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<uchar, int, int>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<uchar, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<uchar, int, float>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<ushort, int, ushort>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<ushort, int, ushort>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<ushort, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<ushort, int, int>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<ushort, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<ushort, int, float>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<short, int, short>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<short, int, short>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<short, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<short, int, int>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<short, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<short, int, float>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<int, int, int>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<int, int, int>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceRows_gpu<int, int, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<int, int, float>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceRows_gpu<float, float, float>(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceRows_gpu<float, float, float>(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <int cn, class Op, typename T, typename S, typename D> __global__ void reduceCols(const DevMem2D_<T> src, D* dst, const Op op)
|
template <int cn, class Op, typename T, typename S, typename D> __global__ void reduceCols(const PtrStepSz<T> src, D* dst, const Op op)
|
||||||
{
|
{
|
||||||
__shared__ S smem[256 * cn];
|
__shared__ S smem[256 * cn];
|
||||||
|
|
||||||
@ -2050,7 +2050,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
dst[y * cn + threadIdx.x] = saturate_cast<D>(op.result(smem[threadIdx.x * 256], src.cols));
|
dst[y * cn + threadIdx.x] = saturate_cast<D>(op.result(smem[threadIdx.x * 256], src.cols));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int cn, template <typename> class Op, typename T, typename S, typename D> void reduceCols_caller(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream)
|
template <int cn, template <typename> class Op, typename T, typename S, typename D> void reduceCols_caller(const PtrStepSz<T>& src, PtrStepSz<D> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(256);
|
const dim3 block(256);
|
||||||
const dim3 grid(src.rows);
|
const dim3 grid(src.rows);
|
||||||
@ -2064,9 +2064,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream)
|
template <typename T, typename S, typename D> void reduceCols_gpu(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2D_<T>& src, DevMem2D_<D> dst, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSz<T>& src, PtrStepSz<D> dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[4][4] =
|
static const caller_t callers[4][4] =
|
||||||
{
|
{
|
||||||
@ -2076,24 +2076,24 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{reduceCols_caller<4, SumReductor, T, S, D>, reduceCols_caller<4, AvgReductor, T, S, D>, reduceCols_caller<4, MaxReductor, T, S, D>, reduceCols_caller<4, MinReductor, T, S, D>},
|
{reduceCols_caller<4, SumReductor, T, S, D>, reduceCols_caller<4, AvgReductor, T, S, D>, reduceCols_caller<4, MaxReductor, T, S, D>, reduceCols_caller<4, MinReductor, T, S, D>},
|
||||||
};
|
};
|
||||||
|
|
||||||
callers[cn - 1][reduceOp](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<D> >(dst), stream);
|
callers[cn - 1][reduceOp](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<D> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void reduceCols_gpu<uchar, int, uchar>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<uchar, int, uchar>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<uchar, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<uchar, int, int>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<uchar, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<uchar, int, float>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<ushort, int, ushort>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<ushort, int, ushort>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<ushort, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<ushort, int, int>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<ushort, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<ushort, int, float>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<short, int, short>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<short, int, short>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<short, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<short, int, int>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<short, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<short, int, float>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<int, int, int>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<int, int, int>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template void reduceCols_gpu<int, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<int, int, float>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<float, float, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<float, float, float>(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
} // namespace mattrix_reductions
|
} // namespace mattrix_reductions
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -49,7 +49,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#define NEEDLE_MAP_SCALE 16
|
#define NEEDLE_MAP_SCALE 16
|
||||||
#define NUM_VERTS_PER_ARROW 6
|
#define NUM_VERTS_PER_ARROW 6
|
||||||
|
|
||||||
__global__ void NeedleMapAverageKernel(const DevMem2Df u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
|
__global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg)
|
||||||
{
|
{
|
||||||
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
|
__shared__ float smem[2 * NEEDLE_MAP_SCALE];
|
||||||
|
|
||||||
@ -111,7 +111,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void NeedleMapAverage_gpu(DevMem2Df u, DevMem2Df v, DevMem2Df u_avg, DevMem2Df v_avg)
|
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg)
|
||||||
{
|
{
|
||||||
const dim3 block(NEEDLE_MAP_SCALE);
|
const dim3 block(NEEDLE_MAP_SCALE);
|
||||||
const dim3 grid(u_avg.cols, u_avg.rows);
|
const dim3 grid(u_avg.cols, u_avg.rows);
|
||||||
@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void NeedleMapVertexKernel(const DevMem2Df u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
|
__global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale)
|
||||||
{
|
{
|
||||||
// test - just draw a triangle at each pixel
|
// test - just draw a triangle at each pixel
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -202,7 +202,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CreateOpticalFlowNeedleMap_gpu(DevMem2Df u_avg, DevMem2Df v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
|
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale)
|
||||||
{
|
{
|
||||||
const dim3 block(16);
|
const dim3 block(16);
|
||||||
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
|
const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y));
|
||||||
|
@ -135,7 +135,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void polynomialExpansionGpu(const DevMem2Df &src, int polyN, DevMem2Df dst, cudaStream_t stream)
|
void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(256);
|
dim3 block(256);
|
||||||
dim3 grid(divUp(src.cols, block.x - 2*polyN), src.rows);
|
dim3 grid(divUp(src.cols, block.x - 2*polyN), src.rows);
|
||||||
@ -251,8 +251,8 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
|
|
||||||
|
|
||||||
void updateMatricesGpu(
|
void updateMatricesGpu(
|
||||||
const DevMem2Df flowx, const DevMem2Df flowy, const DevMem2Df R0, const DevMem2Df R1,
|
const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,
|
||||||
DevMem2Df M, cudaStream_t stream)
|
PtrStepSzf M, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
|
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
|
||||||
@ -288,7 +288,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void updateFlowGpu(const DevMem2Df M, DevMem2Df flowx, DevMem2Df flowy, cudaStream_t stream)
|
void updateFlowGpu(const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
|
dim3 grid(divUp(flowx.cols, block.x), divUp(flowx.rows, block.y));
|
||||||
@ -340,7 +340,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void boxFilterGpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
|
void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(256);
|
dim3 block(256);
|
||||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||||
@ -414,7 +414,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void boxFilter5Gpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
|
void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int height = src.rows / 5;
|
int height = src.rows / 5;
|
||||||
int width = src.cols;
|
int width = src.cols;
|
||||||
@ -433,7 +433,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void boxFilter5Gpu_CC11(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
|
void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int height = src.rows / 5;
|
int height = src.rows / 5;
|
||||||
int width = src.cols;
|
int width = src.cols;
|
||||||
@ -501,7 +501,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
|
|
||||||
|
|
||||||
template <typename Border>
|
template <typename Border>
|
||||||
void gaussianBlurCaller(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
|
void gaussianBlurCaller(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int height = src.rows;
|
int height = src.rows;
|
||||||
int width = src.cols;
|
int width = src.cols;
|
||||||
@ -521,9 +521,9 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
|
|
||||||
|
|
||||||
void gaussianBlurGpu(
|
void gaussianBlurGpu(
|
||||||
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
|
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
|
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -596,7 +596,7 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
|
|
||||||
template <typename Border, int blockDimX>
|
template <typename Border, int blockDimX>
|
||||||
void gaussianBlur5Caller(
|
void gaussianBlur5Caller(
|
||||||
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream)
|
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int height = src.rows / 5;
|
int height = src.rows / 5;
|
||||||
int width = src.cols;
|
int width = src.cols;
|
||||||
@ -616,9 +616,9 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
|
|
||||||
|
|
||||||
void gaussianBlur5Gpu(
|
void gaussianBlur5Gpu(
|
||||||
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
|
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
|
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -630,9 +630,9 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
}
|
}
|
||||||
|
|
||||||
void gaussianBlur5Gpu_CC11(
|
void gaussianBlur5Gpu_CC11(
|
||||||
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderMode, cudaStream_t stream)
|
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderMode, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Df, int, DevMem2Df, cudaStream_t);
|
typedef void (*caller_t)(const PtrStepSzf, int, PtrStepSzf, cudaStream_t);
|
||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
|
@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HarrisResponses_gpu(DevMem2Db img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
|
void HarrisResponses_gpu(PtrStepSzb img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
|
|
||||||
@ -201,7 +201,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void IC_Angle_gpu(DevMem2Db image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
|
void IC_Angle_gpu(PtrStepSzb image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
|
|
||||||
|
@ -124,7 +124,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, template <typename> class B> void pyrDown_caller(DevMem2D_<T> src, DevMem2D_<T> dst, cudaStream_t stream)
|
template <typename T, template <typename> class B> void pyrDown_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(256);
|
const dim3 block(256);
|
||||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||||
@ -138,39 +138,39 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void pyrDown_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
|
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
pyrDown_caller<T, BrdReflect101>(static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(dst), stream);
|
pyrDown_caller<T, BrdReflect101>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void pyrDown_gpu<uchar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<uchar2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
//template void pyrDown_gpu<schar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<char2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<char3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<char4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<ushort>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<ushort2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<short>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<short2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<short3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<short4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
//template void pyrDown_gpu<int>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<int2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<int3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<int4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrDown_gpu<float>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrDown_gpu<float2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrDown_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrDown_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
} // namespace imgproc
|
} // namespace imgproc
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -50,7 +50,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T> __global__ void pyrUp(const DevMem2D_<T> src, DevMem2D_<T> dst)
|
template <typename T> __global__ void pyrUp(const PtrStepSz<T> src, PtrStepSz<T> dst)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||||
|
|
||||||
@ -142,7 +142,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
dst(y, x) = saturate_cast<T>(4.0f * sum);
|
dst(y, x) = saturate_cast<T>(4.0f * sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void pyrUp_caller(DevMem2D_<T> src, DevMem2D_<T> dst, cudaStream_t stream)
|
template <typename T> void pyrUp_caller(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
@ -154,39 +154,39 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void pyrUp_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
|
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
pyrUp_caller<T>(static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(dst), stream);
|
pyrUp_caller<T>(static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void pyrUp_gpu<uchar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<uchar2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<uchar2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
//template void pyrUp_gpu<schar>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<char2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<char2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<char3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<char3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<char4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<char4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<ushort>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<ushort2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<ushort2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<short>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<short2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<short2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<short3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<short3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<short4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<short4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
//template void pyrUp_gpu<int>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<int2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<int2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<int3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<int3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<int4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<int4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void pyrUp_gpu<float>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
//template void pyrUp_gpu<float2>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
//template void pyrUp_gpu<float2>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void pyrUp_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
} // namespace imgproc
|
} // namespace imgproc
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -458,7 +458,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void lkSparse1_gpu(DevMem2Df I, DevMem2Df J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
void lkSparse1_gpu(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||||
int level, dim3 block, dim3 patch, cudaStream_t stream)
|
int level, dim3 block, dim3 patch, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||||
@ -480,7 +480,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
level, block, stream);
|
level, block, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lkSparse4_gpu(DevMem2D_<float4> I, DevMem2D_<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
void lkSparse4_gpu(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||||
int level, dim3 block, dim3 patch, cudaStream_t stream)
|
int level, dim3 block, dim3 patch, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
typedef void (*func_t)(int rows, int cols, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||||
@ -648,8 +648,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lkDense_gpu(DevMem2Db I, DevMem2Df J, DevMem2Df u, DevMem2Df v, DevMem2Df prevU, DevMem2Df prevV,
|
void lkDense_gpu(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
|
||||||
DevMem2Df err, int2 winSize, cudaStream_t stream)
|
PtrStepSzf err, int2 winSize, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(16, 16);
|
dim3 block(16, 16);
|
||||||
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
|
dim3 grid(divUp(I.cols, block.x), divUp(I.rows, block.y));
|
||||||
|
@ -51,7 +51,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
|
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2Df mapx, DevMem2Df mapy, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int)
|
static void call(PtrStepSz<T> src, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, DevMem2D_<T> dst, const float* borderValue, int)
|
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, PtrStepSz<T> dst, const float* borderValue, int)
|
||||||
{
|
{
|
||||||
(void)srcWhole;
|
(void)srcWhole;
|
||||||
(void)xoff;
|
(void)xoff;
|
||||||
@ -121,8 +121,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}; \
|
}; \
|
||||||
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
||||||
{ \
|
{ \
|
||||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, \
|
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||||
DevMem2D_< type > dst, const float* borderValue, int cc) \
|
PtrStepSz< type > dst, const float* borderValue, int cc) \
|
||||||
{ \
|
{ \
|
||||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||||
@ -139,8 +139,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}; \
|
}; \
|
||||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||||
{ \
|
{ \
|
||||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, \
|
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy, \
|
||||||
DevMem2D_< type > dst, const float*, int) \
|
PtrStepSz< type > dst, const float*, int) \
|
||||||
{ \
|
{ \
|
||||||
dim3 block(32, 8); \
|
dim3 block(32, 8); \
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
@ -191,8 +191,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
|
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf mapx, PtrStepSzf mapy,
|
||||||
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
|
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
|
||||||
@ -201,11 +201,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
template <typename T> void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||||
DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap,
|
||||||
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
static const caller_t callers[3][5] =
|
static const caller_t callers[3][5] =
|
||||||
{
|
{
|
||||||
@ -232,38 +232,38 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
callers[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||||
static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
|
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void remap_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void remap_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void remap_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void remap_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void remap_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void remap_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void remap_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
} // namespace imgproc
|
} // namespace imgproc
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -53,7 +53,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, PtrStepSz<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
@ -96,7 +96,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
|
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
@ -113,7 +113,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
|
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
@ -129,7 +129,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
|
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst)
|
||||||
{
|
{
|
||||||
(void)srcWhole;
|
(void)srcWhole;
|
||||||
(void)xoff;
|
(void)xoff;
|
||||||
@ -165,7 +165,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}; \
|
}; \
|
||||||
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
|
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
|
||||||
{ \
|
{ \
|
||||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \
|
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
|
||||||
{ \
|
{ \
|
||||||
dim3 block(32, 8); \
|
dim3 block(32, 8); \
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
@ -210,7 +210,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
|
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
|
||||||
@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T> struct ResizeDispatcher<AreaFilter, T>
|
template <typename T> struct ResizeDispatcher<AreaFilter, T>
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
(void)srcWhole;
|
(void)srcWhole;
|
||||||
(void)xoff;
|
(void)xoff;
|
||||||
@ -236,10 +236,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
|
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
|
||||||
DevMem2Db dst, int interpolation, cudaStream_t stream)
|
PtrStepSzb dst, int interpolation, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
|
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[4] =
|
static const caller_t callers[4] =
|
||||||
{
|
{
|
||||||
@ -252,39 +252,39 @@ namespace cv { namespace gpu { namespace device
|
|||||||
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
|
||||||
interpolation = 1;
|
interpolation = 1;
|
||||||
|
|
||||||
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
|
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||||
static_cast< DevMem2D_<T> >(dst), stream);
|
static_cast< PtrStepSz<T> >(dst), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void resize_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
//template void resize_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
template void resize_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
template void resize_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
//template void resize_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
template void resize_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
//template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
template<typename T> struct scan_traits{};
|
template<typename T> struct scan_traits{};
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
|
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void Gray_to_YV12(const DevMem2Db src, PtrStepb dst)
|
__global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
|
||||||
{
|
{
|
||||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||||
@ -96,7 +96,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void BGR_to_YV12(const DevMem2D_<T> src, PtrStepb dst)
|
__global__ void BGR_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
|
||||||
{
|
{
|
||||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||||
@ -132,7 +132,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
v_plane(y / 2, x / 2) = v_val;
|
v_plane(y / 2, x / 2) = v_val;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gray_to_YV12_caller(const DevMem2Db src, PtrStepb dst)
|
void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||||
@ -143,22 +143,22 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <int cn>
|
template <int cn>
|
||||||
void BGR_to_YV12_caller(const DevMem2Db src, PtrStepb dst)
|
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<uchar, cn>::vec_type src_t;
|
typedef typename TypeVec<uchar, cn>::vec_type src_t;
|
||||||
|
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||||
|
|
||||||
BGR_to_YV12<<<grid, block>>>(static_cast< DevMem2D_<src_t> >(src), dst);
|
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void YV12_gpu(const DevMem2Db src, int cn, DevMem2Db dst)
|
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(const DevMem2Db src, PtrStepb dst);
|
typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst);
|
||||||
|
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
|
@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int KSIZE, typename T, typename D, typename B>
|
template <int KSIZE, typename T, typename D, typename B>
|
||||||
__global__ void linearRowFilter(const DevMem2D_<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
__global__ void linearRowFilter(const PtrStepSz<T> src, PtrStep<D> dst, const int anchor, const B brd)
|
||||||
{
|
{
|
||||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
|
||||||
const int BLOCK_DIM_X = 32;
|
const int BLOCK_DIM_X = 32;
|
||||||
@ -125,7 +125,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int KSIZE, typename T, typename D, template<typename> class B>
|
template <int KSIZE, typename T, typename D, template<typename> class B>
|
||||||
void linearRowFilter_caller(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream)
|
void linearRowFilter_caller(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int BLOCK_DIM_X;
|
int BLOCK_DIM_X;
|
||||||
int BLOCK_DIM_Y;
|
int BLOCK_DIM_Y;
|
||||||
@ -157,9 +157,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<D> dst, int anchor, int cc, cudaStream_t stream);
|
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<D> dst, int anchor, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[5][33] =
|
static const caller_t callers[5][33] =
|
||||||
{
|
{
|
||||||
@ -342,13 +342,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, cc, stream);
|
callers[brd_type][ksize]((PtrStepSz<T>)src, (PtrStepSz<D>)dst, anchor, cc, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void linearRowFilter_gpu<uchar , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearRowFilter_gpu<uchar , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<uchar4, float4>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearRowFilter_gpu<uchar4, float4>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<short3, float3>(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearRowFilter_gpu<short3, float3>(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<int , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearRowFilter_gpu<int , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<float , float >(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
template void linearRowFilter_gpu<float , float >(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
} // namespace row_filter
|
} // namespace row_filter
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -91,8 +91,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
//typedef double4 type3;
|
//typedef double4 type3;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*MergeFunction)(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream);
|
typedef void (*MergeFunction)(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream);
|
||||||
typedef void (*SplitFunction)(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream);
|
typedef void (*SplitFunction)(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream);
|
||||||
|
|
||||||
//------------------------------------------------------------
|
//------------------------------------------------------------
|
||||||
// Merge
|
// Merge
|
||||||
@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void mergeC2_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
static void mergeC2_(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
@ -242,7 +242,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void mergeC3_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
static void mergeC3_(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
@ -259,7 +259,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void mergeC4_(const DevMem2Db* src, DevMem2Db& dst, const cudaStream_t& stream)
|
static void mergeC4_(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
@ -276,7 +276,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void merge_caller(const DevMem2Db* src, DevMem2Db& dst,
|
void merge_caller(const PtrStepSzb* src, PtrStepSzb& dst,
|
||||||
int total_channels, size_t elem_size,
|
int total_channels, size_t elem_size,
|
||||||
const cudaStream_t& stream)
|
const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
@ -435,7 +435,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC2_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
static void splitC2_(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||||
@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC3_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
static void splitC3_(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||||
@ -468,7 +468,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void splitC4_(const DevMem2Db& src, DevMem2Db* dst, const cudaStream_t& stream)
|
static void splitC4_(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||||
@ -485,7 +485,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream)
|
void split_caller(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
static SplitFunction split_func_tbl[] =
|
static SplitFunction split_func_tbl[] =
|
||||||
{
|
{
|
||||||
|
@ -308,7 +308,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<int RADIUS> void kernel_caller(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, cudaStream_t & stream)
|
template<int RADIUS> void kernel_caller(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
dim3 grid(1,1,1);
|
dim3 grid(1,1,1);
|
||||||
dim3 threads(BLOCK_W, 1, 1);
|
dim3 threads(BLOCK_W, 1, 1);
|
||||||
@ -326,7 +326,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*kernel_caller_t)(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, cudaStream_t & stream);
|
typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream);
|
||||||
|
|
||||||
const static kernel_caller_t callers[] =
|
const static kernel_caller_t callers[] =
|
||||||
{
|
{
|
||||||
@ -341,7 +341,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
||||||
|
|
||||||
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz >> 1;
|
int winsz2 = winsz >> 1;
|
||||||
|
|
||||||
@ -370,7 +370,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
||||||
|
|
||||||
__global__ void prefilter_kernel(DevMem2Db output, int prefilterCap)
|
__global__ void prefilter_kernel(PtrStepSzb output, int prefilterCap)
|
||||||
{
|
{
|
||||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -387,7 +387,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap, cudaStream_t & stream)
|
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||||
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
||||||
@ -446,7 +446,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
||||||
|
|
||||||
__global__ void textureness_kernel(DevMem2Db disp, int winsz, float threshold)
|
__global__ void textureness_kernel(PtrStepSzb disp, int winsz, float threshold)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz/2;
|
int winsz2 = winsz/2;
|
||||||
int n_dirty_pixels = (winsz2) * 2;
|
int n_dirty_pixels = (winsz2) * 2;
|
||||||
@ -505,7 +505,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream)
|
void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
avgTexturenessThreshold *= winsz * winsz;
|
avgTexturenessThreshold *= winsz * winsz;
|
||||||
|
|
||||||
|
@ -128,7 +128,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <int cn, typename D>
|
template <int cn, typename D>
|
||||||
__global__ void comp_data(const DevMem2Db left, const PtrStepb right, PtrElemStep_<D> data)
|
__global__ void comp_data(const PtrStepSzb left, const PtrStepb right, PtrStep<D> data)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const uchar* rs = right.ptr(y) + x * cn;
|
const uchar* rs = right.ptr(y) + x * cn;
|
||||||
|
|
||||||
D* ds = data.ptr(y) + x;
|
D* ds = data.ptr(y) + x;
|
||||||
const size_t disp_step = data.step * left.rows;
|
const size_t disp_step = data.step * left.rows / PtrStep<D>::elem_size;
|
||||||
|
|
||||||
for (int disp = 0; disp < cndisp; disp++)
|
for (int disp = 0; disp < cndisp; disp++)
|
||||||
{
|
{
|
||||||
@ -159,9 +159,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename D>
|
template<typename T, typename D>
|
||||||
void comp_data_gpu(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -169,13 +169,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(left.cols, threads.x);
|
grid.x = divUp(left.cols, threads.x);
|
||||||
grid.y = divUp(left.rows, threads.y);
|
grid.y = divUp(left.rows, threads.y);
|
||||||
|
|
||||||
comp_data<1, short><<<grid, threads, 0, stream>>>(left, right, (DevMem2D_<short>)data);
|
comp_data<1, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -183,14 +183,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(left.cols, threads.x);
|
grid.x = divUp(left.cols, threads.x);
|
||||||
grid.y = divUp(left.rows, threads.y);
|
grid.y = divUp(left.rows, threads.y);
|
||||||
|
|
||||||
comp_data<1, float><<<grid, threads, 0, stream>>>(left, right, (DevMem2D_<float>)data);
|
comp_data<1, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar3, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar3, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -198,13 +198,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(left.cols, threads.x);
|
grid.x = divUp(left.cols, threads.x);
|
||||||
grid.y = divUp(left.rows, threads.y);
|
grid.y = divUp(left.rows, threads.y);
|
||||||
|
|
||||||
comp_data<3, short><<<grid, threads, 0, stream>>>(left, right, (DevMem2D_<short>)data);
|
comp_data<3, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar3, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar3, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -212,14 +212,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(left.cols, threads.x);
|
grid.x = divUp(left.cols, threads.x);
|
||||||
grid.y = divUp(left.rows, threads.y);
|
grid.y = divUp(left.rows, threads.y);
|
||||||
|
|
||||||
comp_data<3, float><<<grid, threads, 0, stream>>>(left, right, (DevMem2D_<float>)data);
|
comp_data<3, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> void comp_data_gpu<uchar4, short>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar4, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -227,13 +227,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(left.cols, threads.x);
|
grid.x = divUp(left.cols, threads.x);
|
||||||
grid.y = divUp(left.rows, threads.y);
|
grid.y = divUp(left.rows, threads.y);
|
||||||
|
|
||||||
comp_data<4, short><<<grid, threads, 0, stream>>>(left, right, (DevMem2D_<short>)data);
|
comp_data<4, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
template <> void comp_data_gpu<uchar4, float>(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream)
|
template <> void comp_data_gpu<uchar4, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -241,7 +241,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(left.cols, threads.x);
|
grid.x = divUp(left.cols, threads.x);
|
||||||
grid.y = divUp(left.rows, threads.y);
|
grid.y = divUp(left.rows, threads.y);
|
||||||
|
|
||||||
comp_data<4, float><<<grid, threads, 0, stream>>>(left, right, (DevMem2D_<float>)data);
|
comp_data<4, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream)
|
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -281,30 +281,30 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(dst_cols, threads.x);
|
grid.x = divUp(dst_cols, threads.x);
|
||||||
grid.y = divUp(dst_rows, threads.y);
|
grid.y = divUp(dst_rows, threads.y);
|
||||||
|
|
||||||
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)src, (DevMem2D_<T>)dst);
|
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////// level up messages ////////////////////////
|
/////////////////// level up messages ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const PtrElemStep_<T> src, PtrElemStep_<T> dst)
|
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
if (x < dst_cols && y < dst_rows)
|
if (x < dst_cols && y < dst_rows)
|
||||||
{
|
{
|
||||||
const size_t dst_disp_step = dst.step * dst_rows;
|
const size_t dst_disp_step = dst.step * dst_rows / PtrStep<T>::elem_size;
|
||||||
const size_t src_disp_step = src.step * src_rows;
|
const size_t src_disp_step = src.step * src_rows / PtrStep<T>::elem_size;
|
||||||
|
|
||||||
T* dstr = dst.ptr(y ) + x;
|
T* dstr = dst.ptr(y ) + x;
|
||||||
const T* srcr = src.ptr(y/2) + x/2;
|
const T* srcr = src.ptr(y/2) + x/2;
|
||||||
@ -315,7 +315,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream)
|
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -325,24 +325,24 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
int src_idx = (dst_idx + 1) & 1;
|
int src_idx = (dst_idx + 1) & 1;
|
||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mus[src_idx], (DevMem2D_<T>)mus[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mus[src_idx], (PtrStepSz<T>)mus[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mds[src_idx], (DevMem2D_<T>)mds[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mds[src_idx], (PtrStepSz<T>)mds[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mls[src_idx], (DevMem2D_<T>)mls[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mls[src_idx], (PtrStepSz<T>)mls[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mrs[src_idx], (DevMem2D_<T>)mrs[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mrs[src_idx], (PtrStepSz<T>)mrs[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||||
template void level_up_messages_gpu<float>(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
template void level_up_messages_gpu<float>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
//////////////////// calc all iterations /////////////////////
|
//////////////////// calc all iterations /////////////////////
|
||||||
@ -419,7 +419,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void one_iteration(int t, PtrElemStep_<T> u, T* d, T* l, T* r, const PtrElemStep_<T> data, int cols, int rows)
|
__global__ void one_iteration(int t, PtrStep<T> u, T* d, T* l, T* r, const PtrStep<T> data, int cols, int rows)
|
||||||
{
|
{
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
||||||
@ -427,13 +427,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1))
|
if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1))
|
||||||
{
|
{
|
||||||
T* us = u.ptr(y) + x;
|
T* us = u.ptr(y) + x;
|
||||||
T* ds = d + y * u.step + x;
|
T* ds = d + y * u.step/PtrStep<T>::elem_size + x;
|
||||||
T* ls = l + y * u.step + x;
|
T* ls = l + y * u.step/PtrStep<T>::elem_size + x;
|
||||||
T* rs = r + y * u.step + x;
|
T* rs = r + y * u.step/PtrStep<T>::elem_size + x;
|
||||||
const T* dt = data.ptr(y) + x;
|
const T* dt = data.ptr(y) + x;
|
||||||
|
|
||||||
size_t msg_disp_step = u.step * rows;
|
size_t msg_disp_step = u.step * rows;
|
||||||
size_t data_disp_step = data.step * rows;
|
size_t data_disp_step = data.step * rows / PtrStep<T>::elem_size;
|
||||||
|
|
||||||
message(us + u.step, ls + 1, rs - 1, dt, us, msg_disp_step, data_disp_step);
|
message(us + u.step, ls + 1, rs - 1, dt, us, msg_disp_step, data_disp_step);
|
||||||
message(ds - u.step, ls + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step);
|
message(ds - u.step, ls + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step);
|
||||||
@ -443,8 +443,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void calc_all_iterations_gpu(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d,
|
void calc_all_iterations_gpu(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d,
|
||||||
const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream)
|
const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -454,7 +454,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
for(int t = 0; t < iters; ++t)
|
for(int t = 0; t < iters; ++t)
|
||||||
{
|
{
|
||||||
one_iteration<T><<<grid, threads, 0, stream>>>(t, (DevMem2D_<T>)u, (T*)d.data, (T*)l.data, (T*)r.data, (DevMem2D_<T>)data, cols, rows);
|
one_iteration<T><<<grid, threads, 0, stream>>>(t, (PtrStepSz<T>)u, (T*)d.data, (T*)l.data, (T*)r.data, (PtrStepSz<T>)data, cols, rows);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@ -462,16 +462,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void calc_all_iterations_gpu<short>(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
template void calc_all_iterations_gpu<short>(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||||
template void calc_all_iterations_gpu<float>(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
template void calc_all_iterations_gpu<float>(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////////// output ////////////////////////////
|
/////////////////////////// output ////////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void output(const PtrElemStep_<T> u, const T* d, const T* l, const T* r, const T* data,
|
__global__ void output(const PtrStep<T> u, const T* d, const T* l, const T* r, const T* data,
|
||||||
DevMem2D_<short> disp)
|
PtrStepSz<short> disp)
|
||||||
{
|
{
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
@ -479,12 +479,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
if (y > 0 && y < disp.rows - 1 && x > 0 && x < disp.cols - 1)
|
if (y > 0 && y < disp.rows - 1 && x > 0 && x < disp.cols - 1)
|
||||||
{
|
{
|
||||||
const T* us = u.ptr(y + 1) + x;
|
const T* us = u.ptr(y + 1) + x;
|
||||||
const T* ds = d + (y - 1) * u.step + x;
|
const T* ds = d + (y - 1) * u.step/PtrStep<T>::elem_size + x;
|
||||||
const T* ls = l + y * u.step + (x + 1);
|
const T* ls = l + y * u.step/PtrStep<T>::elem_size + (x + 1);
|
||||||
const T* rs = r + y * u.step + (x - 1);
|
const T* rs = r + y * u.step/PtrStep<T>::elem_size + (x - 1);
|
||||||
const T* dt = data + y * u.step + x;
|
const T* dt = data + y * u.step/PtrStep<T>::elem_size + x;
|
||||||
|
|
||||||
size_t disp_step = disp.rows * u.step;
|
size_t disp_step = disp.rows * u.step/PtrStep<T>::elem_size;
|
||||||
|
|
||||||
int best = 0;
|
int best = 0;
|
||||||
float best_val = numeric_limits<float>::max();
|
float best_val = numeric_limits<float>::max();
|
||||||
@ -508,8 +508,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
void output_gpu(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data,
|
||||||
const DevMem2D_<short>& disp, cudaStream_t stream)
|
const PtrStepSz<short>& disp, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -517,14 +517,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
grid.x = divUp(disp.cols, threads.x);
|
grid.x = divUp(disp.cols, threads.x);
|
||||||
grid.y = divUp(disp.rows, threads.y);
|
grid.y = divUp(disp.rows, threads.y);
|
||||||
|
|
||||||
output<T><<<grid, threads, 0, stream>>>((DevMem2D_<T>)u, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, disp);
|
output<T><<<grid, threads, 0, stream>>>((PtrStepSz<T>)u, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, disp);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void output_gpu<short>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||||
template void output_gpu<float>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
template void output_gpu<float>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||||
} // namespace stereobp
|
} // namespace stereobp
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -72,7 +72,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
|
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||||
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp)
|
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||||
|
|
||||||
@ -860,7 +860,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream)
|
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
size_t disp_step = disp.rows * msg_step;
|
size_t disp_step = disp.rows * msg_step;
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||||
@ -880,9 +880,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
|
|
||||||
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
} // namespace stereocsbp
|
} // namespace stereocsbp
|
||||||
}}} // namespace cv { namespace gpu { namespace device {
|
}}} // namespace cv { namespace gpu { namespace device {
|
||||||
|
@ -102,15 +102,15 @@ namespace cv { namespace gpu { namespace device
|
|||||||
texture<unsigned int, 2, cudaReadModeElementType> sumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<unsigned int, 2, cudaReadModeElementType> sumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
texture<unsigned int, 2, cudaReadModeElementType> maskSumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<unsigned int, 2, cudaReadModeElementType> maskSumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
|
|
||||||
void bindImgTex(DevMem2Db img)
|
void bindImgTex(PtrStepSzb img)
|
||||||
{
|
{
|
||||||
bindTexture(&imgTex, img);
|
bindTexture(&imgTex, img);
|
||||||
}
|
}
|
||||||
void bindSumTex(DevMem2D_<uint> sum)
|
void bindSumTex(PtrStepSz<uint> sum)
|
||||||
{
|
{
|
||||||
bindTexture(&sumTex, sum);
|
bindTexture(&sumTex, sum);
|
||||||
}
|
}
|
||||||
void bindMaskSumTex(DevMem2D_<uint> maskSum)
|
void bindMaskSumTex(PtrStepSz<uint> maskSum)
|
||||||
{
|
{
|
||||||
bindTexture(&maskSumTex, maskSum);
|
bindTexture(&maskSumTex, maskSum);
|
||||||
}
|
}
|
||||||
@ -958,7 +958,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
descriptor_base[threadIdx.x] = lookup / len;
|
descriptor_base[threadIdx.x] = lookup / len;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
void compute_descriptors_gpu(const PtrStepSzf& descriptors,
|
||||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures)
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures)
|
||||||
{
|
{
|
||||||
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
||||||
|
@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
// Build Maps
|
// Build Maps
|
||||||
|
|
||||||
template <class Transform> __global__ void buildWarpMaps(DevMem2Df xmap, PtrStepf ymap)
|
template <class Transform> __global__ void buildWarpMaps(PtrStepSzf xmap, PtrStepf ymap)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -94,7 +94,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Transform> void buildWarpMaps_caller(DevMem2Df xmap, DevMem2Df ymap, cudaStream_t stream)
|
template <class Transform> void buildWarpMaps_caller(PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 block(32, 8);
|
dim3 block(32, 8);
|
||||||
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
dim3 grid(divUp(xmap.cols, block.x), divUp(xmap.rows, block.y));
|
||||||
@ -106,14 +106,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], DevMem2Df xmap, DevMem2Df ymap, cudaStream_t stream)
|
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||||
|
|
||||||
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
buildWarpMaps_caller<AffineTransform>(xmap, ymap, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], DevMem2Df xmap, DevMem2Df ymap, cudaStream_t stream)
|
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||||
|
|
||||||
@ -123,7 +123,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
// Warp
|
// Warp
|
||||||
|
|
||||||
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, DevMem2D_<T> dst)
|
template <class Transform, class Ptr2D, typename T> __global__ void warp(const Ptr2D src, PtrStepSz<T> dst)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -138,7 +138,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
|
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherStream
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int)
|
static void call(PtrStepSz<T> src, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||||
|
|
||||||
@ -156,7 +156,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
|
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcherNonStream
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, int)
|
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, int)
|
||||||
{
|
{
|
||||||
(void)xoff;
|
(void)xoff;
|
||||||
(void)yoff;
|
(void)yoff;
|
||||||
@ -193,7 +193,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}; \
|
}; \
|
||||||
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
|
template <class Transform, template <typename> class Filter, template <typename> class B> struct WarpDispatcherNonStream<Transform, Filter, B, type> \
|
||||||
{ \
|
{ \
|
||||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2D_< type > dst, const float* borderValue, int cc) \
|
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float* borderValue, int cc) \
|
||||||
{ \
|
{ \
|
||||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||||
@ -210,7 +210,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}; \
|
}; \
|
||||||
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
template <class Transform, template <typename> class Filter> struct WarpDispatcherNonStream<Transform, Filter, BrdReplicate, type> \
|
||||||
{ \
|
{ \
|
||||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2D_< type > dst, const float*, int) \
|
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz< type > dst, const float*, int) \
|
||||||
{ \
|
{ \
|
||||||
dim3 block(32, 8); \
|
dim3 block(32, 8); \
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
@ -261,7 +261,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
template <class Transform, template <typename> class Filter, template <typename> class B, typename T> struct WarpDispatcher
|
||||||
{
|
{
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc);
|
WarpDispatcherNonStream<Transform, Filter, B, T>::call(src, srcWhole, xoff, yoff, dst, borderValue, cc);
|
||||||
@ -271,10 +271,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <class Transform, typename T>
|
template <class Transform, typename T>
|
||||||
void warp_caller(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Db dst, int interpolation,
|
void warp_caller(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
typedef void (*func_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
static const func_t funcs[3][5] =
|
static const func_t funcs[3][5] =
|
||||||
{
|
{
|
||||||
@ -301,11 +301,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
funcs[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff,
|
funcs[interpolation][borderMode](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff,
|
||||||
static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
|
static_cast< PtrStepSz<T> >(dst), borderValue, stream, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void warpAffine_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation,
|
template <typename T> void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 2 * 3 * sizeof(float)) );
|
||||||
@ -313,37 +313,37 @@ namespace cv { namespace gpu { namespace device
|
|||||||
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
warp_caller<AffineTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void warpAffine_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void warpAffine_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void warpAffine_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void warpAffine_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void warpAffine_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void warpAffine_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpAffine_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpAffine_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpAffine_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpAffine_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template <typename T> void warpPerspective_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation,
|
template <typename T> void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_warpMat, coeffs, 3 * 3 * sizeof(float)) );
|
||||||
@ -351,34 +351,34 @@ namespace cv { namespace gpu { namespace device
|
|||||||
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
warp_caller<PerspectiveTransform, T>(src, srcWhole, xoff, yoff, dst, interpolation, borderMode, borderValue, stream, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void warpPerspective_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void warpPerspective_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void warpPerspective_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void warpPerspective_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
//template void warpPerspective_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
template void warpPerspective_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
//template void warpPerspective_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void warpPerspective_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void warpPerspective_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void warpPerspective_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
} // namespace imgproc
|
} // namespace imgproc
|
||||||
}}} // namespace cv { namespace gpu { namespace device
|
}}} // namespace cv { namespace gpu { namespace device
|
||||||
|
@ -46,7 +46,7 @@
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
|
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
|
||||||
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
void name(const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
|
|
||||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \
|
#define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \
|
||||||
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
|
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
|
||||||
|
@ -116,7 +116,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppArithmFunc<DEPTH>::npp_t npp_t;
|
typedef typename NppArithmFunc<DEPTH>::npp_t npp_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -135,7 +135,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppArithmFunc<CV_32F>::npp_t npp_t;
|
typedef typename NppArithmFunc<CV_32F>::npp_t npp_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -189,7 +189,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -209,7 +209,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -228,7 +228,7 @@ namespace
|
|||||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||||
typedef typename NppTypeTraits<DEPTH>::npp_complex_type npp_complex_type;
|
typedef typename NppTypeTraits<DEPTH>::npp_complex_type npp_complex_type;
|
||||||
|
|
||||||
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -251,7 +251,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
|
typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -271,7 +271,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
|
typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -290,7 +290,7 @@ namespace
|
|||||||
typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
|
typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
|
||||||
typedef typename NppTypeTraits<CV_32F>::npp_complex_type npp_complex_type;
|
typedef typename NppTypeTraits<CV_32F>::npp_complex_type npp_complex_type;
|
||||||
|
|
||||||
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -316,17 +316,17 @@ namespace
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void add_gpu(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void add_gpu(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },
|
{add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },
|
||||||
@ -338,7 +338,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
|
|||||||
{0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }
|
{0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[] =
|
static const npp_func_t npp_funcs[] =
|
||||||
{
|
{
|
||||||
NppArithm<CV_8U , nppiAdd_8u_C1RSfs >::call,
|
NppArithm<CV_8U , nppiAdd_8u_C1RSfs >::call,
|
||||||
@ -384,7 +384,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },
|
{add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },
|
||||||
@ -396,7 +396,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
|||||||
{0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }
|
{0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[7][4] =
|
static const npp_func_t npp_funcs[7][4] =
|
||||||
{
|
{
|
||||||
{NppArithmScalar<CV_8U , 1, nppiAddC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiAddC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiAddC_8u_C4RSfs >::call},
|
{NppArithmScalar<CV_8U , 1, nppiAddC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiAddC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiAddC_8u_C4RSfs >::call},
|
||||||
@ -452,17 +452,17 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void subtract_gpu(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void subtract_gpu(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },
|
{subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },
|
||||||
@ -474,7 +474,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
|
|||||||
{0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }
|
{0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[6] =
|
static const npp_func_t npp_funcs[6] =
|
||||||
{
|
{
|
||||||
NppArithm<CV_8U , nppiSub_8u_C1RSfs>::call,
|
NppArithm<CV_8U , nppiSub_8u_C1RSfs>::call,
|
||||||
@ -520,7 +520,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },
|
{subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },
|
||||||
@ -532,7 +532,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
|||||||
{0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }
|
{0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[7][4] =
|
static const npp_func_t npp_funcs[7][4] =
|
||||||
{
|
{
|
||||||
{NppArithmScalar<CV_8U , 1, nppiSubC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiSubC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiSubC_8u_C4RSfs >::call},
|
{NppArithmScalar<CV_8U , 1, nppiSubC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiSubC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiSubC_8u_C4RSfs >::call},
|
||||||
@ -587,14 +587,14 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
void multiply_gpu(const PtrStepSz<uchar4>& src1, const PtrStepSzf& src2, const PtrStepSz<uchar4>& dst, cudaStream_t stream);
|
||||||
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
void multiply_gpu(const PtrStepSz<short4>& src1, const PtrStepSzf& src2, const PtrStepSz<short4>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void multiply_gpu(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void multiply_gpu(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
@ -609,7 +609,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
|||||||
|
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
multiply_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
|
multiply_gpu(static_cast<PtrStepSz<uchar4> >(src1), static_cast<PtrStepSzf>(src2), static_cast<PtrStepSz<uchar4> >(dst), stream);
|
||||||
}
|
}
|
||||||
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
|
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
|
||||||
{
|
{
|
||||||
@ -617,11 +617,11 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
|||||||
|
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
multiply_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
|
multiply_gpu(static_cast<PtrStepSz<short4> >(src1), static_cast<PtrStepSzf>(src2), static_cast<PtrStepSz<short4> >(dst), stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },
|
{multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },
|
||||||
@ -633,7 +633,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
|||||||
{0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }
|
{0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[] =
|
static const npp_func_t npp_funcs[] =
|
||||||
{
|
{
|
||||||
NppArithm<CV_8U , nppiMul_8u_C1RSfs >::call,
|
NppArithm<CV_8U , nppiMul_8u_C1RSfs >::call,
|
||||||
@ -689,7 +689,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },
|
{multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },
|
||||||
@ -701,7 +701,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
|||||||
{0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }
|
{0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[7][4] =
|
static const npp_func_t npp_funcs[7][4] =
|
||||||
{
|
{
|
||||||
{NppArithmScalar<CV_8U , 1, nppiMulC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiMulC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiMulC_8u_C4RSfs >::call},
|
{NppArithmScalar<CV_8U , 1, nppiMulC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiMulC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiMulC_8u_C4RSfs >::call},
|
||||||
@ -755,17 +755,17 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
void divide_gpu(const PtrStepSz<uchar4>& src1, const PtrStepSzf& src2, const PtrStepSz<uchar4>& dst, cudaStream_t stream);
|
||||||
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
void divide_gpu(const PtrStepSz<short4>& src1, const PtrStepSzf& src2, const PtrStepSz<short4>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void divide_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void divide_gpu(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void divide_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void divide_gpu(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
void divide_gpu(double scalar, const PtrStepSzb& src2, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
@ -780,7 +780,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
|||||||
|
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
divide_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
|
divide_gpu(static_cast<PtrStepSz<uchar4> >(src1), static_cast<PtrStepSzf>(src2), static_cast<PtrStepSz<uchar4> >(dst), stream);
|
||||||
}
|
}
|
||||||
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
|
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
|
||||||
{
|
{
|
||||||
@ -788,11 +788,11 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
|||||||
|
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
divide_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
|
divide_gpu(static_cast<PtrStepSz<short4> >(src1), static_cast<PtrStepSzf>(src2), static_cast<PtrStepSz<short4> >(dst), stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, const PtrStepSzb& src2, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
|
{divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
|
||||||
@ -804,7 +804,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
|||||||
{0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }
|
{0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[6] =
|
static const npp_func_t npp_funcs[6] =
|
||||||
{
|
{
|
||||||
NppArithm<CV_8U , nppiDiv_8u_C1RSfs >::call,
|
NppArithm<CV_8U , nppiDiv_8u_C1RSfs >::call,
|
||||||
@ -848,7 +848,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, double val, const PtrStepSzb& dst, double scale, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
|
{divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
|
||||||
@ -860,7 +860,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
|
|||||||
{0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }
|
{0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
||||||
static const npp_func_t npp_funcs[7][4] =
|
static const npp_func_t npp_funcs[7][4] =
|
||||||
{
|
{
|
||||||
{NppArithmScalar<CV_8U , 1, nppiDivC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiDivC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiDivC_8u_C4RSfs >::call},
|
{NppArithmScalar<CV_8U , 1, nppiDivC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiDivC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiDivC_8u_C4RSfs >::call},
|
||||||
@ -913,7 +913,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(double scalar, const PtrStepSzb& src2, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
static const func_t funcs[7][7] =
|
static const func_t funcs[7][7] =
|
||||||
{
|
{
|
||||||
{divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
|
{divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
|
||||||
@ -955,10 +955,10 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absdiff_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
void absdiff_gpu(const PtrStepSzb src1, const PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absdiff_gpu(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream);
|
void absdiff_gpu(const PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -974,7 +974,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppAbsDiffFunc<DEPTH>::npp_t npp_t;
|
typedef typename NppAbsDiffFunc<DEPTH>::npp_t npp_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src1, const PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -1014,7 +1014,7 @@ namespace
|
|||||||
typedef typename NppAbsDiffCFunc<DEPTH>::npp_t npp_t;
|
typedef typename NppAbsDiffCFunc<DEPTH>::npp_t npp_t;
|
||||||
typedef typename NppAbsDiffCFunc<DEPTH>::scalar_t scalar_t;
|
typedef typename NppAbsDiffCFunc<DEPTH>::scalar_t scalar_t;
|
||||||
|
|
||||||
static void call(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream)
|
static void call(const PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -1035,7 +1035,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb src1, const PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
NppAbsDiff<CV_8U, nppiAbsDiff_8u_C1R>::call,
|
NppAbsDiff<CV_8U, nppiAbsDiff_8u_C1R>::call,
|
||||||
@ -1065,7 +1065,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
|
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
|
||||||
@ -1366,24 +1366,24 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T> void compare_eq(DevMem2Db src1, DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_eq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_ne(DevMem2Db src1, DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_ne(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_lt(DevMem2Db src1, DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_lt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_le(DevMem2Db src1, DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_le(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void compare_eq(DevMem2Db src, int cn, double val[4], DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_eq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_ne(DevMem2Db src, int cn, double val[4], DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_ne(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_lt(DevMem2Db src, int cn, double val[4], DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_lt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_le(DevMem2Db src, int cn, double val[4], DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_le(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_gt(DevMem2Db src, int cn, double val[4], DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_gt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_ge(DevMem2Db src, int cn, double val[4], DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void compare_ge(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
|
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src1, DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[7][4] =
|
static const func_t funcs[7][4] =
|
||||||
{
|
{
|
||||||
{compare_eq<unsigned char> , compare_ne<unsigned char> , compare_lt<unsigned char> , compare_le<unsigned char> },
|
{compare_eq<unsigned char> , compare_ne<unsigned char> , compare_lt<unsigned char> , compare_le<unsigned char> },
|
||||||
@ -1439,7 +1439,7 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, int cn, double val[4], DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[7][6] =
|
static const func_t funcs[7][6] =
|
||||||
{
|
{
|
||||||
{compare_eq<unsigned char> , compare_gt<unsigned char> , compare_ge<unsigned char> , compare_lt<unsigned char> , compare_le<unsigned char> , compare_ne<unsigned char> },
|
{compare_eq<unsigned char> , compare_gt<unsigned char> , compare_ge<unsigned char> , compare_lt<unsigned char> , compare_le<unsigned char> , compare_ne<unsigned char> },
|
||||||
@ -1900,18 +1900,18 @@ void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& st
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T> void min_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void min_gpu(const PtrStepSzb src1, const PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void max_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void max_gpu(const PtrStepSzb src1, const PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> void min_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void min_gpu(const PtrStepSzb src, T val, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void max_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void max_gpu(const PtrStepSzb src, T val, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb src1, const PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
min_gpu<unsigned char>,
|
min_gpu<unsigned char>,
|
||||||
@ -1941,7 +1941,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb src1, const PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
max_gpu<unsigned char>,
|
max_gpu<unsigned char>,
|
||||||
@ -1969,12 +1969,12 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T> void minScalar(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream)
|
template <typename T> void minScalar(const PtrStepSzb src, double val, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cv::gpu::device::min_gpu(src, saturate_cast<T>(val), dst, stream);
|
cv::gpu::device::min_gpu(src, saturate_cast<T>(val), dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void maxScalar(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream)
|
template <typename T> void maxScalar(const PtrStepSzb src, double val, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cv::gpu::device::max_gpu(src, saturate_cast<T>(val), dst, stream);
|
cv::gpu::device::max_gpu(src, saturate_cast<T>(val), dst, stream);
|
||||||
}
|
}
|
||||||
@ -1982,7 +1982,7 @@ namespace
|
|||||||
|
|
||||||
void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
minScalar<unsigned char>,
|
minScalar<unsigned char>,
|
||||||
@ -2010,7 +2010,7 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
|||||||
|
|
||||||
void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
maxScalar<unsigned char>,
|
maxScalar<unsigned char>,
|
||||||
@ -2042,7 +2042,7 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type, cudaStream_t stream);
|
void threshold_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, T thresh, T maxVal, int type, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -2109,14 +2109,14 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template<typename T> void pow_caller(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
|
template<typename T> void pow_caller(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
pow_caller<unsigned char>, pow_caller<signed char>,
|
pow_caller<unsigned char>, pow_caller<signed char>,
|
||||||
@ -2216,14 +2216,14 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T1, typename T2, typename D>
|
template <typename T1, typename T2, typename D>
|
||||||
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
void addWeighted_gpu(const PtrStepSzb& src1, double alpha, const PtrStepSzb& src2, double beta, double gamma, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream)
|
void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace cv::gpu::device;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& src1, double alpha, const PtrStepSzb& src2, double beta, double gamma, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[7][7][7] =
|
static const func_t funcs[7][7][7] =
|
||||||
{
|
{
|
||||||
|
@ -113,8 +113,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace fast
|
namespace fast
|
||||||
{
|
{
|
||||||
int calcKeypoints_gpu(DevMem2Db img, DevMem2Db mask, short2* kpLoc, int maxKeypoints, DevMem2Di score, int threshold);
|
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
|
||||||
int nonmaxSupression_gpu(const short2* kpLoc, int count, DevMem2Di score, short2* loc, float* response);
|
int nonmaxSupression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -138,7 +138,7 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma
|
|||||||
score_.setTo(Scalar::all(0));
|
score_.setTo(Scalar::all(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSupression ? score_ : DevMem2Di(), threshold);
|
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSupression ? score_ : PtrStepSzi(), threshold);
|
||||||
count_ = std::min(count_, maxKeypoints);
|
count_ = std::min(count_, maxKeypoints);
|
||||||
|
|
||||||
return count_;
|
return count_;
|
||||||
|
@ -336,7 +336,7 @@ namespace
|
|||||||
{
|
{
|
||||||
void calcDiffHistogram(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::gpu::GpuMat& hist, cv::gpu::GpuMat& histBuf)
|
void calcDiffHistogram(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::gpu::GpuMat& hist, cv::gpu::GpuMat& histBuf)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, unsigned int* hist0, unsigned int* hist1, unsigned int* hist2, unsigned int* partialBuf0, unsigned int* partialBuf1, unsigned int* partialBuf2, int cc, cudaStream_t stream);
|
||||||
static const func_t funcs[4][4] =
|
static const func_t funcs[4][4] =
|
||||||
{
|
{
|
||||||
{0,0,0,0},
|
{0,0,0,0},
|
||||||
@ -401,7 +401,7 @@ namespace
|
|||||||
|
|
||||||
void calcDiffThreshMask(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::Vec3d bestThres, cv::gpu::GpuMat& changeMask)
|
void calcDiffThreshMask(const cv::gpu::GpuMat& prevFrame, const cv::gpu::GpuMat& curFrame, cv::Vec3d bestThres, cv::gpu::GpuMat& changeMask)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame, uchar3 bestThres, cv::gpu::DevMem2Db changeMask, cudaStream_t stream);
|
typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, uchar3 bestThres, cv::gpu::PtrStepSzb changeMask, cudaStream_t stream);
|
||||||
static const func_t funcs[4][4] =
|
static const func_t funcs[4][4] =
|
||||||
{
|
{
|
||||||
{0,0,0,0},
|
{0,0,0,0},
|
||||||
@ -450,7 +450,7 @@ namespace
|
|||||||
cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& countBuf,
|
cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& countBuf,
|
||||||
const cv::gpu::FGDStatModel::Params& params, int out_cn)
|
const cv::gpu::FGDStatModel::Params& params, int out_cn)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame, cv::gpu::DevMem2Db Ftd, cv::gpu::DevMem2Db Fbd, cv::gpu::DevMem2Db foreground,
|
typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd, cv::gpu::PtrStepSzb foreground,
|
||||||
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
int deltaC, int deltaCC, float alpha2, int N1c, int N1cc, cudaStream_t stream);
|
||||||
static const func_t funcs[4][4][4] =
|
static const func_t funcs[4][4][4] =
|
||||||
{
|
{
|
||||||
@ -602,8 +602,8 @@ namespace
|
|||||||
const cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& background,
|
const cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& background,
|
||||||
const cv::gpu::FGDStatModel::Params& params)
|
const cv::gpu::FGDStatModel::Params& params)
|
||||||
{
|
{
|
||||||
typedef void (*func_t)(cv::gpu::DevMem2Db prevFrame, cv::gpu::DevMem2Db curFrame, cv::gpu::DevMem2Db Ftd, cv::gpu::DevMem2Db Fbd,
|
typedef void (*func_t)(cv::gpu::PtrStepSzb prevFrame, cv::gpu::PtrStepSzb curFrame, cv::gpu::PtrStepSzb Ftd, cv::gpu::PtrStepSzb Fbd,
|
||||||
cv::gpu::DevMem2Db foreground, cv::gpu::DevMem2Db background,
|
cv::gpu::PtrStepSzb foreground, cv::gpu::PtrStepSzb background,
|
||||||
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
int deltaC, int deltaCC, float alpha1, float alpha2, float alpha3, int N1c, int N1cc, int N2c, int N2cc, float T, cudaStream_t stream);
|
||||||
static const func_t funcs[4][4][4] =
|
static const func_t funcs[4][4][4] =
|
||||||
{
|
{
|
||||||
|
@ -664,7 +664,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void filter2D_gpu(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
|
void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream);
|
int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
@ -708,7 +708,7 @@ namespace
|
|||||||
nppFilter2D_t func;
|
nppFilter2D_t func;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*gpuFilter2D_t)(DevMem2Db srcWhole, int ofsX, int ofsY, DevMem2Db dst,
|
typedef void (*gpuFilter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream);
|
int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
@ -833,13 +833,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace row_filter
|
namespace row_filter
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
void linearRowFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace column_filter
|
namespace column_filter
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
void linearColumnFilter_gpu(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -848,7 +848,7 @@ namespace
|
|||||||
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI,
|
||||||
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor);
|
||||||
|
|
||||||
typedef void (*gpuFilter1D_t)(DevMem2Db src, DevMem2Db dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
typedef void (*gpuFilter1D_t)(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream);
|
||||||
|
|
||||||
struct NppLinearRowFilter : public BaseRowFilter_GPU
|
struct NppLinearRowFilter : public BaseRowFilter_GPU
|
||||||
{
|
{
|
||||||
|
@ -56,8 +56,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace gfft
|
namespace gfft
|
||||||
{
|
{
|
||||||
int findCorners_gpu(DevMem2Df eig, float threshold, DevMem2Db mask, float2* corners, int max_count);
|
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count);
|
||||||
void sortCorners_gpu(DevMem2Df eig, float2* corners, int count);
|
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ namespace cv { namespace gpu { namespace device { namespace globmotion {
|
|||||||
|
|
||||||
void calcWobbleSuppressionMaps(
|
void calcWobbleSuppressionMaps(
|
||||||
int left, int idx, int right, int width, int height,
|
int left, int idx, int right, int width, int height,
|
||||||
const float *ml, const float *mr, DevMem2Df mapx, DevMem2Df mapy);
|
const float *ml, const float *mr, PtrStepSzf mapx, PtrStepSzf mapy);
|
||||||
|
|
||||||
}}}}
|
}}}}
|
||||||
|
|
||||||
|
@ -56,10 +56,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace ccl
|
namespace ccl
|
||||||
{
|
{
|
||||||
void labelComponents(const DevMem2D& edges, DevMem2Di comps, int flags, cudaStream_t stream);
|
void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -77,7 +77,7 @@ void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scal
|
|||||||
|
|
||||||
int depth = image.depth();
|
int depth = image.depth();
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t suppotLookup[8][4] =
|
static const func_t suppotLookup[8][4] =
|
||||||
{ // 1, 2, 3, 4
|
{ // 1, 2, 3, 4
|
||||||
|
@ -70,8 +70,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
int nblocks_win_x, int nblocks_win_y);
|
int nblocks_win_x, int nblocks_win_y);
|
||||||
|
|
||||||
void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
|
void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
|
||||||
int height, int width, const cv::gpu::DevMem2Df& grad,
|
int height, int width, const cv::gpu::PtrStepSzf& grad,
|
||||||
const cv::gpu::DevMem2Db& qangle, float sigma, float* block_hists);
|
const cv::gpu::PtrStepSzb& qangle, float sigma, float* block_hists);
|
||||||
|
|
||||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int height, int width, float* block_hists, float threshold);
|
int height, int width, float* block_hists, float threshold);
|
||||||
@ -87,18 +87,18 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||||
cv::gpu::DevMem2Df descriptors);
|
cv::gpu::PtrStepSzf descriptors);
|
||||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||||
cv::gpu::DevMem2Df descriptors);
|
cv::gpu::PtrStepSzf descriptors);
|
||||||
|
|
||||||
void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2Db& img,
|
void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::PtrStepSzb& img,
|
||||||
float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2Db qangle, bool correct_gamma);
|
float angle_scale, cv::gpu::PtrStepSzf grad, cv::gpu::PtrStepSzb qangle, bool correct_gamma);
|
||||||
void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::DevMem2Db& img,
|
void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::PtrStepSzb& img,
|
||||||
float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2Db qangle, bool correct_gamma);
|
float angle_scale, cv::gpu::PtrStepSzf grad, cv::gpu::PtrStepSzb qangle, bool correct_gamma);
|
||||||
|
|
||||||
void resize_8UC1(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
void resize_8UC1(const cv::gpu::PtrStepSzb& src, cv::gpu::PtrStepSzb dst);
|
||||||
void resize_8UC4(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
void resize_8UC4(const cv::gpu::PtrStepSzb& src, cv::gpu::PtrStepSzb dst);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -58,13 +58,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace hough
|
namespace hough
|
||||||
{
|
{
|
||||||
int buildPointList_gpu(DevMem2Db src, unsigned int* list);
|
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
|
||||||
|
|
||||||
void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
|
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
|
||||||
int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
|
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
|
||||||
|
|
||||||
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, DevMem2Di accum, int minRadius, int maxRadius, float idp);
|
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
|
||||||
int buildCentersList_gpu(DevMem2Di accum, unsigned int* centers, int threshold);
|
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
|
||||||
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
|
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
|
||||||
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
|
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
|
||||||
}
|
}
|
||||||
|
@ -104,7 +104,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -140,7 +140,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -177,8 +177,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
|
||||||
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -191,7 +191,7 @@ namespace
|
|||||||
|
|
||||||
dst.create(src.size(), CV_8UC4);
|
dst.create(src.size(), CV_8UC4);
|
||||||
|
|
||||||
drawColorDisp_gpu((DevMem2D_<T>)src, dst, ndisp, stream);
|
drawColorDisp_gpu((PtrStepSz<T>)src, dst, ndisp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
|
typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
|
||||||
@ -214,7 +214,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void reprojectImageTo3D_gpu(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream);
|
void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -222,7 +222,7 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q,
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::imgproc;
|
using namespace cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db disp, DevMem2Db xyz, const float* q, cudaStream_t stream);
|
typedef void (*func_t)(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream);
|
||||||
static const func_t funcs[2][4] =
|
static const func_t funcs[2][4] =
|
||||||
{
|
{
|
||||||
{reprojectImageTo3D_gpu<uchar, float3>, 0, 0, reprojectImageTo3D_gpu<short, float3>},
|
{reprojectImageTo3D_gpu<uchar, float3>, 0, 0, reprojectImageTo3D_gpu<short, float3>},
|
||||||
@ -245,13 +245,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
|
template <typename T, int cn> void copyMakeBorder_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T, int cn> void copyMakeBorder_caller(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
|
template <typename T, int cn> void copyMakeBorder_caller(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
@ -326,7 +326,7 @@ void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);
|
||||||
static const caller_t callers[6][4] =
|
static const caller_t callers[6][4] =
|
||||||
{
|
{
|
||||||
{ copyMakeBorder_caller<uchar, 1> , 0/*copyMakeBorder_caller<uchar, 2>*/ , copyMakeBorder_caller<uchar, 3> , copyMakeBorder_caller<uchar, 4>},
|
{ copyMakeBorder_caller<uchar, 1> , 0/*copyMakeBorder_caller<uchar, 2>*/ , copyMakeBorder_caller<uchar, 3> , copyMakeBorder_caller<uchar, 4>},
|
||||||
@ -354,7 +354,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}
|
}
|
||||||
@ -388,7 +388,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}
|
}
|
||||||
@ -421,7 +421,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}
|
}
|
||||||
@ -537,7 +537,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void shfl_integral_gpu(DevMem2Db img, DevMem2D_<unsigned int> integral, cudaStream_t stream);
|
void shfl_integral_gpu(PtrStepSzb img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -652,7 +652,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst);
|
void columnSum_32F(const PtrStepSzb src, const PtrStepSzb dst);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -991,12 +991,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace hist
|
namespace hist
|
||||||
{
|
{
|
||||||
void histogram256_gpu(DevMem2Db src, int* hist, unsigned int* buf, cudaStream_t stream);
|
void histogram256_gpu(PtrStepSzb src, int* hist, unsigned int* buf, cudaStream_t stream);
|
||||||
|
|
||||||
const int PARTIAL_HISTOGRAM256_COUNT = 240;
|
const int PARTIAL_HISTOGRAM256_COUNT = 240;
|
||||||
const int HISTOGRAM256_BIN_COUNT = 256;
|
const int HISTOGRAM256_BIN_COUNT = 256;
|
||||||
|
|
||||||
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream);
|
void equalizeHist_gpu(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -1072,8 +1072,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void cornerHarris_gpu(int block_size, float k, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream);
|
void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
|
||||||
void cornerMinEigenVal_gpu(int block_size, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream);
|
void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -1195,9 +1195,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
|
||||||
|
|
||||||
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, PtrStepSz<cufftComplex> c, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -1206,7 +1206,7 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag
|
|||||||
(void)flags;
|
(void)flags;
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, PtrStepSz<cufftComplex>, cudaStream_t stream);
|
||||||
|
|
||||||
static Caller callers[] = { device::imgproc::mulSpectrums, device::imgproc::mulSpectrums_CONJ };
|
static Caller callers[] = { device::imgproc::mulSpectrums, device::imgproc::mulSpectrums_CONJ };
|
||||||
|
|
||||||
@ -1226,9 +1226,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
|
||||||
|
|
||||||
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, PtrStepSz<cufftComplex> c, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -1237,7 +1237,7 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c,
|
|||||||
(void)flags;
|
(void)flags;
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, PtrStepSz<cufftComplex>, cudaStream_t stream);
|
||||||
static Caller callers[] = { device::imgproc::mulAndScaleSpectrums, device::imgproc::mulAndScaleSpectrums_CONJ };
|
static Caller callers[] = { device::imgproc::mulAndScaleSpectrums, device::imgproc::mulAndScaleSpectrums_CONJ };
|
||||||
|
|
||||||
CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
|
CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
|
||||||
|
@ -56,85 +56,85 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace match_template
|
namespace match_template
|
||||||
{
|
{
|
||||||
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,
|
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
|
||||||
int cn, cudaStream_t stream);
|
int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,
|
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
|
||||||
int cn, cudaStream_t stream);
|
int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream);
|
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_8UC2(
|
void matchTemplatePrepared_CCOFF_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const PtrStepSz<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const PtrStepSz<unsigned int> image_sum_g,
|
||||||
unsigned int templ_sum_r,
|
unsigned int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
unsigned int templ_sum_g,
|
||||||
DevMem2Df result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_8UC3(
|
void matchTemplatePrepared_CCOFF_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const PtrStepSz<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const PtrStepSz<unsigned int> image_sum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b,
|
const PtrStepSz<unsigned int> image_sum_b,
|
||||||
unsigned int templ_sum_r,
|
unsigned int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
unsigned int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
unsigned int templ_sum_b,
|
||||||
DevMem2Df result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_8UC4(
|
void matchTemplatePrepared_CCOFF_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r,
|
const PtrStepSz<unsigned int> image_sum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g,
|
const PtrStepSz<unsigned int> image_sum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b,
|
const PtrStepSz<unsigned int> image_sum_b,
|
||||||
const DevMem2D_<unsigned int> image_sum_a,
|
const PtrStepSz<unsigned int> image_sum_a,
|
||||||
unsigned int templ_sum_r,
|
unsigned int templ_sum_r,
|
||||||
unsigned int templ_sum_g,
|
unsigned int templ_sum_g,
|
||||||
unsigned int templ_sum_b,
|
unsigned int templ_sum_b,
|
||||||
unsigned int templ_sum_a,
|
unsigned int templ_sum_a,
|
||||||
DevMem2Df result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
|
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
void matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||||
int w, int h, const DevMem2D_<unsigned int> image_sum,
|
int w, int h, const PtrStepSz<unsigned int> image_sum,
|
||||||
const DevMem2D_<unsigned long long> image_sqsum,
|
const PtrStepSz<unsigned long long> image_sqsum,
|
||||||
unsigned int templ_sum, unsigned long long templ_sqsum,
|
unsigned int templ_sum, unsigned long long templ_sqsum,
|
||||||
DevMem2Df result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||||
DevMem2Df result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
|
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||||
DevMem2Df result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||||
int w, int h,
|
int w, int h,
|
||||||
const DevMem2D_<unsigned int> image_sum_r, const DevMem2D_<unsigned long long> image_sqsum_r,
|
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
|
||||||
const DevMem2D_<unsigned int> image_sum_g, const DevMem2D_<unsigned long long> image_sqsum_g,
|
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
|
||||||
const DevMem2D_<unsigned int> image_sum_b, const DevMem2D_<unsigned long long> image_sqsum_b,
|
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
|
||||||
const DevMem2D_<unsigned int> image_sum_a, const DevMem2D_<unsigned long long> image_sqsum_a,
|
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
|
||||||
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
|
||||||
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
|
||||||
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
|
||||||
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
|
||||||
DevMem2Df result, cudaStream_t stream);
|
PtrStepSzf result, cudaStream_t stream);
|
||||||
|
|
||||||
void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
|
||||||
unsigned long long templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream);
|
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream);
|
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -211,22 +211,22 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace sum
|
namespace sum
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void sumCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void sumMultipassCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void absSumCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void absSumMultipassCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void sqrSumCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void sqrSumMultipassCaller(const PtrStepSzb src, PtrStepb buf, double* sum, int cn);
|
||||||
|
|
||||||
void getBufSizeRequired(int cols, int rows, int cn, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int cn, int& bufcols, int& bufrows);
|
||||||
}
|
}
|
||||||
@ -244,7 +244,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::matrix_reductions::sum;
|
using namespace cv::gpu::device::matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
static Caller multipass_callers[] =
|
static Caller multipass_callers[] =
|
||||||
{
|
{
|
||||||
@ -288,7 +288,7 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::matrix_reductions::sum;
|
using namespace cv::gpu::device::matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
static Caller multipass_callers[] =
|
static Caller multipass_callers[] =
|
||||||
{
|
{
|
||||||
@ -333,7 +333,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::matrix_reductions::sum;
|
using namespace cv::gpu::device::matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const PtrStepSzb, PtrStepb, double*, int);
|
||||||
|
|
||||||
static Caller multipass_callers[] =
|
static Caller multipass_callers[] =
|
||||||
{
|
{
|
||||||
@ -378,16 +378,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf);
|
void minMaxCaller(const PtrStepSzb src, double* minval, double* maxval, PtrStepb buf);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
void minMaxMaskCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMultipassCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf);
|
void minMaxMultipassCaller(const PtrStepSzb src, double* minval, double* maxval, PtrStepb buf);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
void minMaxMaskMultipassCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@ -404,8 +404,8 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
|||||||
{
|
{
|
||||||
using namespace ::cv::gpu::device::matrix_reductions::minmax;
|
using namespace ::cv::gpu::device::matrix_reductions::minmax;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
|
typedef void (*Caller)(const PtrStepSzb, double*, double*, PtrStepb);
|
||||||
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
typedef void (*MaskedCaller)(const PtrStepSzb, const PtrStepb, double*, double*, PtrStepb);
|
||||||
|
|
||||||
static Caller multipass_callers[] =
|
static Caller multipass_callers[] =
|
||||||
{
|
{
|
||||||
@ -488,19 +488,19 @@ namespace cv { namespace gpu { namespace device
|
|||||||
int& b1rows, int& b2cols, int& b2rows);
|
int& b1rows, int& b2cols, int& b2rows);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocCaller(const DevMem2Db src, double* minval, double* maxval,
|
void minMaxLocCaller(const PtrStepSzb src, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
void minMaxLocMaskCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocMultipassCaller(const DevMem2Db src, double* minval, double* maxval,
|
void minMaxLocMultipassCaller(const PtrStepSzb src, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
void minMaxLocMaskMultipassCaller(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -517,8 +517,8 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
|||||||
{
|
{
|
||||||
using namespace ::cv::gpu::device::matrix_reductions::minmaxloc;
|
using namespace ::cv::gpu::device::matrix_reductions::minmaxloc;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
typedef void (*Caller)(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
typedef void (*MaskedCaller)(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
|
|
||||||
static Caller multipass_callers[] =
|
static Caller multipass_callers[] =
|
||||||
{
|
{
|
||||||
@ -606,10 +606,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int countNonZeroCaller(const DevMem2Db src, PtrStepb buf);
|
int countNonZeroCaller(const PtrStepSzb src, PtrStepb buf);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf);
|
int countNonZeroMultipassCaller(const PtrStepSzb src, PtrStepb buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@ -625,7 +625,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
|||||||
{
|
{
|
||||||
using namespace ::cv::gpu::device::matrix_reductions::countnonzero;
|
using namespace ::cv::gpu::device::matrix_reductions::countnonzero;
|
||||||
|
|
||||||
typedef int (*Caller)(const DevMem2Db src, PtrStepb buf);
|
typedef int (*Caller)(const PtrStepSzb src, PtrStepb buf);
|
||||||
|
|
||||||
static Caller multipass_callers[7] =
|
static Caller multipass_callers[7] =
|
||||||
{
|
{
|
||||||
@ -669,8 +669,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace matrix_reductions
|
namespace matrix_reductions
|
||||||
{
|
{
|
||||||
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template <typename T, typename S, typename D> void reduceRows_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template <typename T, typename S, typename D> void reduceCols_gpu(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -689,7 +689,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
|
|||||||
|
|
||||||
if (dim == 0)
|
if (dim == 0)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSzb& src, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[6][6] =
|
static const caller_t callers[6][6] =
|
||||||
{
|
{
|
||||||
@ -752,7 +752,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
typedef void (*caller_t)(const PtrStepSzb& src, int cn, const PtrStepSzb& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
static const caller_t callers[6][6] =
|
static const caller_t callers[6][6] =
|
||||||
{
|
{
|
||||||
|
@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace pyramid
|
namespace pyramid
|
||||||
{
|
{
|
||||||
template <typename T> void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
|
template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 bDim(16, 8);
|
dim3 bDim(16, 8);
|
||||||
dim3 gDim(divUp(src.cols, bDim.x), divUp(src.rows, bDim.y));
|
dim3 gDim(divUp(src.cols, bDim.x), divUp(src.rows, bDim.y));
|
||||||
@ -220,17 +220,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void kernelDownsampleX2_gpu<uchar1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelDownsampleX2_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelDownsampleX2_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void kernelDownsampleX2_gpu<ushort1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelDownsampleX2_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelDownsampleX2_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void kernelDownsampleX2_gpu<float1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelDownsampleX2_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelDownsampleX2_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelDownsampleX2_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -281,7 +281,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace pyramid
|
namespace pyramid
|
||||||
{
|
{
|
||||||
template <typename T> void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream)
|
template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 bDim(16, 8);
|
dim3 bDim(16, 8);
|
||||||
dim3 gDim(divUp(dst.cols, bDim.x), divUp(dst.rows, bDim.y));
|
dim3 gDim(divUp(dst.cols, bDim.x), divUp(dst.rows, bDim.y));
|
||||||
@ -295,17 +295,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
template void kernelInterpolateFrom1_gpu<uchar1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelInterpolateFrom1_gpu<uchar3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelInterpolateFrom1_gpu<uchar4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void kernelInterpolateFrom1_gpu<ushort1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelInterpolateFrom1_gpu<ushort3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelInterpolateFrom1_gpu<ushort4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template void kernelInterpolateFrom1_gpu<float1>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelInterpolateFrom1_gpu<float3>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template void kernelInterpolateFrom1_gpu<float4>(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template void kernelInterpolateFrom1_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@
|
|||||||
#define __OPENCV_GPU_COMMON_HPP__
|
#define __OPENCV_GPU_COMMON_HPP__
|
||||||
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include "opencv2/core/devmem2d.hpp"
|
#include "opencv2/core/cuda_devptrs.hpp"
|
||||||
|
|
||||||
#ifndef CV_PI
|
#ifndef CV_PI
|
||||||
#define CV_PI 3.1415926535897932384626433832795
|
#define CV_PI 3.1415926535897932384626433832795
|
||||||
@ -101,7 +101,7 @@ namespace cv { namespace gpu
|
|||||||
typedef signed char schar;
|
typedef signed char schar;
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
|
|
||||||
template<class T> inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
|
template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||||
|
@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
|
__global__ static void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<UnOp> ft;
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
|
typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
|
||||||
@ -239,7 +239,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
|
static __global__ void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -251,7 +251,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
|
__global__ static void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
|
||||||
const Mask mask, const BinOp op)
|
const Mask mask, const BinOp op)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
@ -291,7 +291,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
|
static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
|
||||||
const Mask mask, const BinOp op)
|
const Mask mask, const BinOp op)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
@ -309,7 +309,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template<> struct TransformDispatcher<false>
|
template<> struct TransformDispatcher<false>
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<UnOp> ft;
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
|
||||||
@ -324,7 +324,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
static void call(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
|
||||||
@ -341,7 +341,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template<> struct TransformDispatcher<true>
|
template<> struct TransformDispatcher<true>
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
static void call(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<UnOp> ft;
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
|
||||||
@ -365,7 +365,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
static void call(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
|
||||||
|
@ -50,14 +50,14 @@
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
static inline void transform(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<UnOp> ft;
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
static inline void transform(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||||
|
@ -193,8 +193,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace optical_flow
|
namespace optical_flow
|
||||||
{
|
{
|
||||||
void NeedleMapAverage_gpu(DevMem2Df u, DevMem2Df v, DevMem2Df u_avg, DevMem2Df v_avg);
|
void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg);
|
||||||
void CreateOpticalFlowNeedleMap_gpu(DevMem2Df u_avg, DevMem2Df v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale);
|
void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -66,33 +66,33 @@ namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
|||||||
int polyN, const float *g, const float *xg, const float *xxg,
|
int polyN, const float *g, const float *xg, const float *xxg,
|
||||||
float ig11, float ig03, float ig33, float ig55);
|
float ig11, float ig03, float ig33, float ig55);
|
||||||
|
|
||||||
void polynomialExpansionGpu(const DevMem2Df &src, int polyN, DevMem2Df dst, cudaStream_t stream);
|
void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream);
|
||||||
|
|
||||||
void setUpdateMatricesConsts();
|
void setUpdateMatricesConsts();
|
||||||
|
|
||||||
void updateMatricesGpu(
|
void updateMatricesGpu(
|
||||||
const DevMem2Df flowx, const DevMem2Df flowy, const DevMem2Df R0, const DevMem2Df R1,
|
const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,
|
||||||
DevMem2Df M, cudaStream_t stream);
|
PtrStepSzf M, cudaStream_t stream);
|
||||||
|
|
||||||
void updateFlowGpu(
|
void updateFlowGpu(
|
||||||
const DevMem2Df M, DevMem2Df flowx, DevMem2Df flowy, cudaStream_t stream);
|
const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream);
|
||||||
|
|
||||||
/*void boxFilterGpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream);*/
|
/*void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);*/
|
||||||
|
|
||||||
void boxFilter5Gpu(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream);
|
void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
|
||||||
|
|
||||||
void boxFilter5Gpu_CC11(const DevMem2Df src, int ksizeHalf, DevMem2Df dst, cudaStream_t stream);
|
void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);
|
||||||
|
|
||||||
void setGaussianBlurKernel(const float *gKer, int ksizeHalf);
|
void setGaussianBlurKernel(const float *gKer, int ksizeHalf);
|
||||||
|
|
||||||
void gaussianBlurGpu(
|
void gaussianBlurGpu(
|
||||||
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderType, cudaStream_t stream);
|
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
void gaussianBlur5Gpu(
|
void gaussianBlur5Gpu(
|
||||||
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderType, cudaStream_t stream);
|
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
void gaussianBlur5Gpu_CC11(
|
void gaussianBlur5Gpu_CC11(
|
||||||
const DevMem2Df src, int ksizeHalf, DevMem2Df dst, int borderType, cudaStream_t stream);
|
const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
}}}} // namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
}}}} // namespace cv { namespace gpu { namespace device { namespace optflow_farneback
|
||||||
|
|
||||||
|
@ -69,11 +69,11 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
int cull_gpu(int* loc, float* response, int size, int n_points);
|
int cull_gpu(int* loc, float* response, int size, int n_points);
|
||||||
|
|
||||||
void HarrisResponses_gpu(DevMem2Db img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream);
|
void HarrisResponses_gpu(PtrStepSzb img, const short2* loc, float* response, const int npoints, int blockSize, float harris_k, cudaStream_t stream);
|
||||||
|
|
||||||
void loadUMax(const int* u_max, int count);
|
void loadUMax(const int* u_max, int count);
|
||||||
|
|
||||||
void IC_Angle_gpu(DevMem2Db image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream);
|
void IC_Angle_gpu(PtrStepSzb image, const short2* loc, float* angle, int npoints, int half_k, cudaStream_t stream);
|
||||||
|
|
||||||
void computeOrbDescriptor_gpu(PtrStepb img, const short2* loc, const float* angle, const int npoints,
|
void computeOrbDescriptor_gpu(PtrStepb img, const short2* loc, const float* angle, const int npoints,
|
||||||
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize, int WTA_K, cudaStream_t stream);
|
const int* pattern_x, const int* pattern_y, PtrStepb desc, int dsize, int WTA_K, cudaStream_t stream);
|
||||||
|
@ -58,7 +58,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T> void pyrDown_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::imgproc;
|
using namespace cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
{
|
{
|
||||||
@ -96,7 +96,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T> void pyrUp_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -104,7 +104,7 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::imgproc;
|
using namespace cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
{
|
{
|
||||||
@ -134,8 +134,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace pyramid
|
namespace pyramid
|
||||||
{
|
{
|
||||||
template <typename T> void kernelDownsampleX2_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
template <typename T> void kernelInterpolateFrom1_gpu(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -143,7 +143,7 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::pyramid;
|
using namespace cv::gpu::device::pyramid;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
{
|
{
|
||||||
@ -191,7 +191,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::pyramid;
|
using namespace cv::gpu::device::pyramid;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
{
|
{
|
||||||
|
@ -61,13 +61,13 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
void loadConstants(int2 winSize, int iters);
|
void loadConstants(int2 winSize, int iters);
|
||||||
|
|
||||||
void lkSparse1_gpu(DevMem2Df I, DevMem2Df J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
void lkSparse1_gpu(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||||
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
||||||
void lkSparse4_gpu(DevMem2D_<float4> I, DevMem2D_<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
void lkSparse4_gpu(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
|
||||||
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
|
||||||
|
|
||||||
void lkDense_gpu(DevMem2Db I, DevMem2Df J, DevMem2Df u, DevMem2Df v, DevMem2Df prevU, DevMem2Df prevV,
|
void lkDense_gpu(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
|
||||||
DevMem2Df err, int2 winSize, cudaStream_t stream = 0);
|
PtrStepSzf err, int2 winSize, cudaStream_t stream = 0);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -221,7 +221,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
|
|||||||
int2 winSize2i = make_int2(winSize.width, winSize.height);
|
int2 winSize2i = make_int2(winSize.width, winSize.height);
|
||||||
loadConstants(winSize2i, iters);
|
loadConstants(winSize2i, iters);
|
||||||
|
|
||||||
DevMem2Df derr = err ? *err : DevMem2Df();
|
PtrStepSzf derr = err ? *err : PtrStepSzf();
|
||||||
|
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
|
|
||||||
@ -230,7 +230,7 @@ void cv::gpu::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextI
|
|||||||
int idx2 = (idx + 1) & 1;
|
int idx2 = (idx + 1) & 1;
|
||||||
|
|
||||||
lkDense_gpu(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
|
lkDense_gpu(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
|
||||||
level == 0 ? derr : DevMem2Df(), winSize2i);
|
level == 0 ? derr : PtrStepSzf(), winSize2i);
|
||||||
|
|
||||||
if (level > 0)
|
if (level > 0)
|
||||||
idx = idx2;
|
idx = idx2;
|
||||||
|
@ -53,7 +53,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst,
|
void remap_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst,
|
||||||
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@ -62,7 +62,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::imgproc;
|
using namespace cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation,
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzf xmap, PtrStepSzf ymap, PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
@ -98,7 +98,7 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
|
|||||||
Point ofs;
|
Point ofs;
|
||||||
src.locateROI(wholeSize, ofs);
|
src.locateROI(wholeSize, ofs);
|
||||||
|
|
||||||
func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
|
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, xmap, ymap,
|
||||||
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc);
|
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream), cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,8 +64,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
|
void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
|
||||||
DevMem2Db dst, int interpolation, cudaStream_t stream);
|
PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -139,7 +139,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
{
|
{
|
||||||
using namespace ::cv::gpu::device::imgproc;
|
using namespace ::cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
{
|
{
|
||||||
@ -154,7 +154,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
const func_t func = funcs[src.depth()][src.channels() - 1];
|
const func_t func = funcs[src.depth()][src.channels() - 1];
|
||||||
CV_Assert(func != 0);
|
CV_Assert(func != 0);
|
||||||
|
|
||||||
func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
|
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
|
||||||
static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream);
|
static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,8 +59,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace split_merge
|
namespace split_merge
|
||||||
{
|
{
|
||||||
void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
|
void merge_caller(const PtrStepSzb* src, PtrStepSzb& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
|
||||||
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
|
void split_caller(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -102,11 +102,11 @@ namespace
|
|||||||
{
|
{
|
||||||
dst.create(size, CV_MAKETYPE(depth, total_channels));
|
dst.create(size, CV_MAKETYPE(depth, total_channels));
|
||||||
|
|
||||||
DevMem2Db src_as_devmem[4];
|
PtrStepSzb src_as_devmem[4];
|
||||||
for(size_t i = 0; i < n; ++i)
|
for(size_t i = 0; i < n; ++i)
|
||||||
src_as_devmem[i] = src[i];
|
src_as_devmem[i] = src[i];
|
||||||
|
|
||||||
DevMem2Db dst_as_devmem(dst);
|
PtrStepSzb dst_as_devmem(dst);
|
||||||
merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream);
|
merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -137,11 +137,11 @@ namespace
|
|||||||
|
|
||||||
CV_Assert(num_channels <= 4);
|
CV_Assert(num_channels <= 4);
|
||||||
|
|
||||||
DevMem2Db dst_as_devmem[4];
|
PtrStepSzb dst_as_devmem[4];
|
||||||
for (int i = 0; i < num_channels; ++i)
|
for (int i = 0; i < num_channels; ++i)
|
||||||
dst_as_devmem[i] = dst[i];
|
dst_as_devmem[i] = dst[i];
|
||||||
|
|
||||||
DevMem2Db src_as_devmem(src);
|
PtrStepSzb src_as_devmem(src);
|
||||||
split_caller(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), stream);
|
split_caller(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,9 +59,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace stereobm
|
namespace stereobm
|
||||||
{
|
{
|
||||||
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int ndisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t & stream);
|
void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int ndisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t & stream);
|
||||||
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
||||||
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream);
|
void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -65,17 +65,17 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
|
||||||
template<typename T, typename D>
|
template<typename T, typename D>
|
||||||
void comp_data_gpu(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void calc_all_iterations_gpu(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d,
|
void calc_all_iterations_gpu(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d,
|
||||||
const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
void output_gpu(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data,
|
||||||
const DevMem2D_<short>& disp, cudaStream_t stream);
|
const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@ -137,7 +137,7 @@ namespace
|
|||||||
|
|
||||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*comp_data_t)(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
typedef void (*comp_data_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
|
||||||
static const comp_data_t comp_data_callers[2][5] =
|
static const comp_data_t comp_data_callers[2][5] =
|
||||||
{
|
{
|
||||||
{0, comp_data_gpu<unsigned char, short>, 0, comp_data_gpu<uchar3, short>, comp_data_gpu<uchar4, short>},
|
{0, comp_data_gpu<unsigned char, short>, 0, comp_data_gpu<uchar3, short>, comp_data_gpu<uchar4, short>},
|
||||||
@ -253,25 +253,25 @@ namespace
|
|||||||
|
|
||||||
void calcBP(GpuMat& disp, Stream& stream)
|
void calcBP(GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||||
static const data_step_down_t data_step_down_callers[2] =
|
static const data_step_down_t data_step_down_callers[2] =
|
||||||
{
|
{
|
||||||
data_step_down_gpu<short>, data_step_down_gpu<float>
|
data_step_down_gpu<short>, data_step_down_gpu<float>
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*level_up_messages_t)(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2Db* mus, DevMem2Db* mds, DevMem2Db* mls, DevMem2Db* mrs, cudaStream_t stream);
|
typedef void (*level_up_messages_t)(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||||
static const level_up_messages_t level_up_messages_callers[2] =
|
static const level_up_messages_t level_up_messages_callers[2] =
|
||||||
{
|
{
|
||||||
level_up_messages_gpu<short>, level_up_messages_gpu<float>
|
level_up_messages_gpu<short>, level_up_messages_gpu<float>
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*calc_all_iterations_t)(int cols, int rows, int iters, const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, cudaStream_t stream);
|
typedef void (*calc_all_iterations_t)(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||||
static const calc_all_iterations_t calc_all_iterations_callers[2] =
|
static const calc_all_iterations_t calc_all_iterations_callers[2] =
|
||||||
{
|
{
|
||||||
calc_all_iterations_gpu<short>, calc_all_iterations_gpu<float>
|
calc_all_iterations_gpu<short>, calc_all_iterations_gpu<float>
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*output_t)(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
typedef void (*output_t)(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||||
static const output_t output_callers[2] =
|
static const output_t output_callers[2] =
|
||||||
{
|
{
|
||||||
output_gpu<short>, output_gpu<float>
|
output_gpu<short>, output_gpu<float>
|
||||||
@ -291,10 +291,10 @@ namespace
|
|||||||
data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream);
|
data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
DevMem2Db mus[] = {u, u2};
|
PtrStepSzb mus[] = {u, u2};
|
||||||
DevMem2Db mds[] = {d, d2};
|
PtrStepSzb mds[] = {d, d2};
|
||||||
DevMem2Db mrs[] = {r, r2};
|
PtrStepSzb mrs[] = {r, r2};
|
||||||
DevMem2Db mls[] = {l, l2};
|
PtrStepSzb mls[] = {l, l2};
|
||||||
|
|
||||||
int mem_idx = (rthis.levels & 1) ? 0 : 1;
|
int mem_idx = (rthis.levels & 1) ? 0 : 1;
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
namespace stereocsbp
|
namespace stereocsbp
|
||||||
{
|
{
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||||
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp);
|
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp);
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
|
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
|
||||||
@ -85,7 +85,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -70,9 +70,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
||||||
void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
||||||
|
|
||||||
void bindImgTex(DevMem2Db img);
|
void bindImgTex(PtrStepSzb img);
|
||||||
void bindSumTex(DevMem2D_<unsigned int> sum);
|
void bindSumTex(PtrStepSz<unsigned int> sum);
|
||||||
void bindMaskSumTex(DevMem2D_<unsigned int> maskSum);
|
void bindMaskSumTex(PtrStepSz<unsigned int> maskSum);
|
||||||
|
|
||||||
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols, int octave, int nOctaveLayers);
|
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols, int octave, int nOctaveLayers);
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures);
|
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures);
|
||||||
|
|
||||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
void compute_descriptors_gpu(const PtrStepSzf& descriptors,
|
||||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
@ -132,7 +132,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
namespace video_decoding
|
namespace video_decoding
|
||||||
{
|
{
|
||||||
void loadHueCSC(float hueCSC[9]);
|
void loadHueCSC(float hueCSC[9]);
|
||||||
void NV12ToARGB_gpu(const PtrStepb decodedFrame, DevMem2D_<unsigned int> interopFrame, cudaStream_t stream = 0);
|
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<unsigned int> interopFrame, cudaStream_t stream = 0);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -505,7 +505,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace video_encoding
|
namespace video_encoding
|
||||||
{
|
{
|
||||||
void YV12_gpu(const DevMem2Db src, int cn, DevMem2Db dst);
|
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
|
@ -57,16 +57,16 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace imgproc
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], DevMem2Df xmap, DevMem2Df ymap, cudaStream_t stream);
|
void buildWarpAffineMaps_gpu(float coeffs[2 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void warpAffine_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation,
|
void warpAffine_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], DevMem2Df xmap, DevMem2Df ymap, cudaStream_t stream);
|
void buildWarpPerspectiveMaps_gpu(float coeffs[3 * 3], PtrStepSzf xmap, PtrStepSzf ymap, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void warpPerspective_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[3 * 3], DevMem2Db dst, int interpolation,
|
void warpPerspective_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[3 * 3], PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@ -275,7 +275,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::imgproc;
|
using namespace cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation,
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
@ -314,7 +314,7 @@ void cv::gpu::warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsiz
|
|||||||
DeviceInfo info;
|
DeviceInfo info;
|
||||||
int cc = info.majorVersion() * 10 + info.minorVersion();
|
int cc = info.majorVersion() * 10 + info.minorVersion();
|
||||||
|
|
||||||
func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
||||||
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), cc);
|
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), cc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -417,7 +417,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
|
|||||||
{
|
{
|
||||||
using namespace cv::gpu::device::imgproc;
|
using namespace cv::gpu::device::imgproc;
|
||||||
|
|
||||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float coeffs[2 * 3], DevMem2Db dst, int interpolation,
|
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float coeffs[2 * 3], PtrStepSzb dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
|
|
||||||
static const func_t funcs[6][4] =
|
static const func_t funcs[6][4] =
|
||||||
@ -456,7 +456,7 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
|
|||||||
DeviceInfo info;
|
DeviceInfo info;
|
||||||
int cc = info.majorVersion() * 10 + info.minorVersion();
|
int cc = info.majorVersion() * 10 + info.minorVersion();
|
||||||
|
|
||||||
func(src, DevMem2Db(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y, coeffs,
|
||||||
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), cc);
|
dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(s), cc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1201,7 +1201,7 @@ PARAM_TEST_CASE(HoughCircles, cv::gpu::DeviceInfo, cv::Size, UseRoi)
|
|||||||
dst.setTo(cv::Scalar::all(0));
|
dst.setTo(cv::Scalar::all(0));
|
||||||
|
|
||||||
for (size_t i = 0; i < circles.size(); ++i)
|
for (size_t i = 0; i < circles.size(); ++i)
|
||||||
cv::circle(dst, cv::Point(circles[i][0], circles[i][1]), circles[i][2], cv::Scalar::all(255), fill ? -1 : 1);
|
cv::circle(dst, cv::Point2f(circles[i][0], circles[i][1]), (int)circles[i][2], cv::Scalar::all(255), fill ? -1 : 1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1220,10 +1220,10 @@ TEST_P(HoughCircles, Accuracy)
|
|||||||
const int votesThreshold = 20;
|
const int votesThreshold = 20;
|
||||||
|
|
||||||
std::vector<cv::Vec3f> circles_gold(4);
|
std::vector<cv::Vec3f> circles_gold(4);
|
||||||
circles_gold[0] = cv::Vec3f(20, 20, minRadius);
|
circles_gold[0] = cv::Vec3i(20, 20, minRadius);
|
||||||
circles_gold[1] = cv::Vec3f(90, 87, minRadius + 3);
|
circles_gold[1] = cv::Vec3i(90, 87, minRadius + 3);
|
||||||
circles_gold[2] = cv::Vec3f(30, 70, minRadius + 8);
|
circles_gold[2] = cv::Vec3i(30, 70, minRadius + 8);
|
||||||
circles_gold[3] = cv::Vec3f(80, 10, maxRadius);
|
circles_gold[3] = cv::Vec3i(80, 10, maxRadius);
|
||||||
|
|
||||||
cv::Mat src(size, CV_8UC1);
|
cv::Mat src(size, CV_8UC1);
|
||||||
drawCircles(src, circles_gold, true);
|
drawCircles(src, circles_gold, true);
|
||||||
|
@ -331,6 +331,9 @@ private:
|
|||||||
Mat dst;
|
Mat dst;
|
||||||
int* x_ofs, pix_size4;
|
int* x_ofs, pix_size4;
|
||||||
double ify;
|
double ify;
|
||||||
|
|
||||||
|
resizeNNInvoker(const resizeNNInvoker&);
|
||||||
|
resizeNNInvoker& operator=(const resizeNNInvoker&);
|
||||||
};
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -1193,6 +1196,9 @@ private:
|
|||||||
const AT* alpha, *_beta;
|
const AT* alpha, *_beta;
|
||||||
const Size ssize, dsize;
|
const Size ssize, dsize;
|
||||||
const int ksize, xmin, xmax;
|
const int ksize, xmin, xmax;
|
||||||
|
|
||||||
|
resizeGeneric_Invoker(const resizeGeneric_Invoker&);
|
||||||
|
resizeGeneric_Invoker& operator=(const resizeGeneric_Invoker&);
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class HResize, class VResize>
|
template<class HResize, class VResize>
|
||||||
@ -1280,6 +1286,9 @@ private:
|
|||||||
const int cn;
|
const int cn;
|
||||||
bool fast_mode;
|
bool fast_mode;
|
||||||
const int step;
|
const int step;
|
||||||
|
|
||||||
|
ResizeAreaFast_2x2_8u(const ResizeAreaFast_2x2_8u&);
|
||||||
|
ResizeAreaFast_2x2_8u& operator=(const ResizeAreaFast_2x2_8u&);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, typename WT, typename VecOp>
|
template <typename T, typename WT, typename VecOp>
|
||||||
@ -1357,7 +1366,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
D[dx] = saturate_cast<WT>((float)sum/count);
|
D[dx] = saturate_cast<T>((float)sum/count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1367,6 +1376,9 @@ private:
|
|||||||
Mat dst;
|
Mat dst;
|
||||||
const int scale_x, scale_y;
|
const int scale_x, scale_y;
|
||||||
const int *ofs, *xofs;
|
const int *ofs, *xofs;
|
||||||
|
|
||||||
|
resizeAreaFast_Invoker(const resizeAreaFast_Invoker&);
|
||||||
|
resizeAreaFast_Invoker& operator=(const resizeAreaFast_Invoker&);
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T, typename WT, typename VecOp>
|
template<typename T, typename WT, typename VecOp>
|
||||||
@ -1600,6 +1612,8 @@ private:
|
|||||||
#ifdef HAVE_TBB
|
#ifdef HAVE_TBB
|
||||||
const int *yofs, *cur_dy_ofs;
|
const int *yofs, *cur_dy_ofs;
|
||||||
#endif
|
#endif
|
||||||
|
resizeArea_Invoker(const resizeArea_Invoker&);
|
||||||
|
resizeArea_Invoker& operator=(const resizeArea_Invoker&);
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T, typename WT>
|
template<typename T, typename WT>
|
||||||
@ -1617,7 +1631,7 @@ static void resizeArea_( const Mat& src, Mat& dst, const DecimateAlpha* xofs, in
|
|||||||
cur_dy_ofs[sy] = cur_dy;
|
cur_dy_ofs[sy] = cur_dy;
|
||||||
if( (cur_dy + 1)*scale_y_ <= sy + 1 || sy == ssize.height - 1 )
|
if( (cur_dy + 1)*scale_y_ <= sy + 1 || sy == ssize.height - 1 )
|
||||||
{
|
{
|
||||||
WT beta = std::max(sy + 1 - (cur_dy+1)*scale_y_, 0.);
|
WT beta = (WT)std::max(sy + 1 - (cur_dy+1)*scale_y_, 0.);
|
||||||
if( fabs(beta) < 1e-3 )
|
if( fabs(beta) < 1e-3 )
|
||||||
{
|
{
|
||||||
if(cur_dy >= dsize.height)
|
if(cur_dy >= dsize.height)
|
||||||
@ -2960,6 +2974,9 @@ private:
|
|||||||
RemapNNFunc nnfunc;
|
RemapNNFunc nnfunc;
|
||||||
RemapFunc ifunc;
|
RemapFunc ifunc;
|
||||||
const void *ctab;
|
const void *ctab;
|
||||||
|
|
||||||
|
remapInvoker(const remapInvoker&);
|
||||||
|
remapInvoker& operator=(const remapInvoker&);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -3324,8 +3341,11 @@ private:
|
|||||||
const Scalar borderValue;
|
const Scalar borderValue;
|
||||||
int *adelta, *bdelta;
|
int *adelta, *bdelta;
|
||||||
double *M;
|
double *M;
|
||||||
|
|
||||||
|
warpAffineInvoker(const warpAffineInvoker&);
|
||||||
|
warpAffineInvoker& operator=(const warpAffineInvoker&);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3477,6 +3497,8 @@ private:
|
|||||||
double* M;
|
double* M;
|
||||||
int interpolation, borderType;
|
int interpolation, borderType;
|
||||||
const Scalar borderValue;
|
const Scalar borderValue;
|
||||||
|
warpPerspectiveInvoker(const warpPerspectiveInvoker&);
|
||||||
|
warpPerspectiveInvoker& operator=(const warpPerspectiveInvoker&);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -271,7 +271,7 @@ namespace
|
|||||||
for (int i = 0; i < nfeatures; ++i)
|
for (int i = 0; i < nfeatures; ++i)
|
||||||
weights[i] *= 1.0f - learningRate_;
|
weights[i] *= 1.0f - learningRate_;
|
||||||
|
|
||||||
bool inserted = insertFeature(newFeatureColor, learningRate_, colors, weights, nfeatures, maxFeatures_);
|
bool inserted = insertFeature(newFeatureColor, (float)learningRate_, colors, weights, nfeatures, maxFeatures_);
|
||||||
|
|
||||||
if (inserted)
|
if (inserted)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user