mirror of
https://github.com/opencv/opencv.git
synced 2024-11-26 12:10:49 +08:00
used new device layer for cv::gpu::divide
This commit is contained in:
parent
e7dba695b3
commit
574ff47146
@ -40,191 +40,203 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
#ifndef HAVE_OPENCV_CUDEV
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
#error "opencv_cudev is required"
|
||||
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cuda::device;
|
||||
#else
|
||||
|
||||
namespace arithm
|
||||
#include "opencv2/cudev.hpp"
|
||||
|
||||
using namespace cv::cudev;
|
||||
|
||||
void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
|
||||
void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
|
||||
void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
|
||||
|
||||
namespace
|
||||
{
|
||||
struct Div_8uc4_32f : binary_function<uint, float, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, float b) const
|
||||
{
|
||||
uint res = 0;
|
||||
|
||||
if (b != 0)
|
||||
{
|
||||
b = 1.0f / b;
|
||||
res |= (saturate_cast<uchar>((0xffu & (a )) * b) );
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 8)) * b) << 8);
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
struct Div_16sc4_32f : binary_function<short4, float, short4>
|
||||
{
|
||||
__device__ __forceinline__ short4 operator ()(short4 a, float b) const
|
||||
{
|
||||
return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
|
||||
saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
|
||||
: make_short4(0,0,0,0);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct Div : binary_function<T, T, D>
|
||||
template <typename T, typename D> struct DivOp : binary_function<T, T, D>
|
||||
{
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? saturate_cast<D>(a / b) : 0;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, float> : binary_function<T, T, float>
|
||||
template <typename T> struct DivOp<T, float> : binary_function<T, T, float>
|
||||
{
|
||||
__device__ __forceinline__ float operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? static_cast<float>(a) / b : 0;
|
||||
return b != 0 ? static_cast<float>(a) / b : 0.0f;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, double> : binary_function<T, T, double>
|
||||
template <typename T> struct DivOp<T, double> : binary_function<T, T, double>
|
||||
{
|
||||
__device__ __forceinline__ double operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? static_cast<double>(a) / b : 0;
|
||||
return b != 0 ? static_cast<double>(a) / b : 0.0;
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
|
||||
template <typename T, typename S, typename D> struct DivScaleOp : binary_function<T, T, D>
|
||||
{
|
||||
S scale;
|
||||
|
||||
__host__ explicit DivScale(S scale_) : scale(scale_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? saturate_cast<D>(scale * a / b) : 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <> struct TransformPolicy<double> : DefaultTransformPolicy
|
||||
{
|
||||
enum {
|
||||
shift = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
device::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
|
||||
{
|
||||
device::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D>
|
||||
void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
|
||||
void divMatImpl(const GpuMat& src1, const GpuMat& src2, const GpuMat& dst, double scale, Stream& stream)
|
||||
{
|
||||
if (scale == 1)
|
||||
{
|
||||
Div<T, D> op;
|
||||
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
DivOp<T, D> op;
|
||||
gridTransformBinary_< TransformPolicy<S> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), op, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
DivScale<T, S, D> op(static_cast<S>(scale));
|
||||
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
DivScaleOp<T, S, D> op;
|
||||
op.scale = static_cast<S>(scale);
|
||||
gridTransformBinary_< TransformPolicy<S> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), op, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void divMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
template void divMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
||||
void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, const GpuMat& dst, double scale, Stream& stream);
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{
|
||||
divMatImpl<uchar, float, uchar>,
|
||||
divMatImpl<uchar, float, schar>,
|
||||
divMatImpl<uchar, float, ushort>,
|
||||
divMatImpl<uchar, float, short>,
|
||||
divMatImpl<uchar, float, int>,
|
||||
divMatImpl<uchar, float, float>,
|
||||
divMatImpl<uchar, double, double>
|
||||
},
|
||||
{
|
||||
divMatImpl<schar, float, uchar>,
|
||||
divMatImpl<schar, float, schar>,
|
||||
divMatImpl<schar, float, ushort>,
|
||||
divMatImpl<schar, float, short>,
|
||||
divMatImpl<schar, float, int>,
|
||||
divMatImpl<schar, float, float>,
|
||||
divMatImpl<schar, double, double>
|
||||
},
|
||||
{
|
||||
0 /*divMatImpl<ushort, float, uchar>*/,
|
||||
0 /*divMatImpl<ushort, float, schar>*/,
|
||||
divMatImpl<ushort, float, ushort>,
|
||||
divMatImpl<ushort, float, short>,
|
||||
divMatImpl<ushort, float, int>,
|
||||
divMatImpl<ushort, float, float>,
|
||||
divMatImpl<ushort, double, double>
|
||||
},
|
||||
{
|
||||
0 /*divMatImpl<short, float, uchar>*/,
|
||||
0 /*divMatImpl<short, float, schar>*/,
|
||||
divMatImpl<short, float, ushort>,
|
||||
divMatImpl<short, float, short>,
|
||||
divMatImpl<short, float, int>,
|
||||
divMatImpl<short, float, float>,
|
||||
divMatImpl<short, double, double>
|
||||
},
|
||||
{
|
||||
0 /*divMatImpl<int, float, uchar>*/,
|
||||
0 /*divMatImpl<int, float, schar>*/,
|
||||
0 /*divMatImpl<int, float, ushort>*/,
|
||||
0 /*divMatImpl<int, float, short>*/,
|
||||
divMatImpl<int, float, int>,
|
||||
divMatImpl<int, float, float>,
|
||||
divMatImpl<int, double, double>
|
||||
},
|
||||
{
|
||||
0 /*divMatImpl<float, float, uchar>*/,
|
||||
0 /*divMatImpl<float, float, schar>*/,
|
||||
0 /*divMatImpl<float, float, ushort>*/,
|
||||
0 /*divMatImpl<float, float, short>*/,
|
||||
0 /*divMatImpl<float, float, int>*/,
|
||||
divMatImpl<float, float, float>,
|
||||
divMatImpl<float, double, double>
|
||||
},
|
||||
{
|
||||
0 /*divMatImpl<double, double, uchar>*/,
|
||||
0 /*divMatImpl<double, double, schar>*/,
|
||||
0 /*divMatImpl<double, double, ushort>*/,
|
||||
0 /*divMatImpl<double, double, short>*/,
|
||||
0 /*divMatImpl<double, double, int>*/,
|
||||
0 /*divMatImpl<double, double, float>*/,
|
||||
divMatImpl<double, double, double>
|
||||
}
|
||||
};
|
||||
|
||||
const int sdepth = src1.depth();
|
||||
const int ddepth = dst.depth();
|
||||
|
||||
CV_DbgAssert( sdepth < 7 && ddepth < 7 );
|
||||
|
||||
GpuMat src1_ = src1.reshape(1);
|
||||
GpuMat src2_ = src2.reshape(1);
|
||||
GpuMat dst_ = dst.reshape(1);
|
||||
|
||||
const func_t func = funcs[sdepth][ddepth];
|
||||
|
||||
if (!func)
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
||||
|
||||
func(src1_, src2_, dst_, scale, stream);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
struct DivOpSpecial : binary_function<T, float, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(const T& a, float b) const
|
||||
{
|
||||
typedef typename VecTraits<T>::elem_type elem_type;
|
||||
|
||||
T res = VecTraits<T>::all(0);
|
||||
|
||||
if (b != 0)
|
||||
{
|
||||
b = 1.0f / b;
|
||||
res.x = saturate_cast<elem_type>(a.x * b);
|
||||
res.y = saturate_cast<elem_type>(a.y * b);
|
||||
res.z = saturate_cast<elem_type>(a.z * b);
|
||||
res.w = saturate_cast<elem_type>(a.w * b);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
gridTransformBinary(globPtr<uchar4>(src1), globPtr<float>(src2), globPtr<uchar4>(dst), DivOpSpecial<uchar4>(), stream);
|
||||
}
|
||||
|
||||
void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
gridTransformBinary(globPtr<short4>(src1), globPtr<float>(src2), globPtr<short4>(dst), DivOpSpecial<short4>(), stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -40,129 +40,225 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
#ifndef HAVE_OPENCV_CUDEV
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
#error "opencv_cudev is required"
|
||||
|
||||
using namespace cv::cuda;
|
||||
using namespace cv::cuda::device;
|
||||
#else
|
||||
|
||||
namespace arithm
|
||||
#include "opencv2/cudev.hpp"
|
||||
|
||||
using namespace cv::cudev;
|
||||
|
||||
void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T, typename S, typename D> struct DivScalar : unary_function<T, D>
|
||||
template <int cn> struct SafeDiv;
|
||||
template <> struct SafeDiv<1>
|
||||
{
|
||||
S val;
|
||||
|
||||
__host__ explicit DivScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
template <typename T>
|
||||
__device__ __forceinline__ static T op(T a, T b)
|
||||
{
|
||||
return saturate_cast<D>(a / val);
|
||||
return b != 0 ? a / b : 0;
|
||||
}
|
||||
};
|
||||
template <> struct SafeDiv<2>
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ static T op(const T& a, const T& b)
|
||||
{
|
||||
T res;
|
||||
|
||||
res.x = b.x != 0 ? a.x / b.x : 0;
|
||||
res.y = b.y != 0 ? a.y / b.y : 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
template <> struct SafeDiv<3>
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ static T op(const T& a, const T& b)
|
||||
{
|
||||
T res;
|
||||
|
||||
res.x = b.x != 0 ? a.x / b.x : 0;
|
||||
res.y = b.y != 0 ? a.y / b.y : 0;
|
||||
res.z = b.z != 0 ? a.z / b.z : 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
template <> struct SafeDiv<4>
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ static T op(const T& a, const T& b)
|
||||
{
|
||||
T res;
|
||||
|
||||
res.x = b.x != 0 ? a.x / b.x : 0;
|
||||
res.y = b.y != 0 ? a.y / b.y : 0;
|
||||
res.z = b.z != 0 ? a.z / b.z : 0;
|
||||
res.w = b.w != 0 ? a.w / b.w : 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct DivScalarInv : unary_function<T, D>
|
||||
template <typename SrcType, typename ScalarType, typename DstType> struct DivScalarOp : unary_function<SrcType, DstType>
|
||||
{
|
||||
S val;
|
||||
ScalarType val;
|
||||
|
||||
explicit DivScalarInv(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
__device__ __forceinline__ DstType operator ()(SrcType a) const
|
||||
{
|
||||
return a != 0 ? saturate_cast<D>(val / a) : 0;
|
||||
return saturate_cast<DstType>(SafeDiv<VecTraits<ScalarType>::cn>::op(saturate_cast<ScalarType>(a), val));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename SrcType, typename ScalarType, typename DstType> struct DivScalarOpInv : unary_function<SrcType, DstType>
|
||||
{
|
||||
ScalarType val;
|
||||
|
||||
__device__ __forceinline__ DstType operator ()(SrcType a) const
|
||||
{
|
||||
return saturate_cast<DstType>(SafeDiv<VecTraits<ScalarType>::cn>::op(val, saturate_cast<ScalarType>(a)));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalarInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D>
|
||||
void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream)
|
||||
template <> struct TransformPolicy<double> : DefaultTransformPolicy
|
||||
{
|
||||
enum {
|
||||
shift = 1
|
||||
};
|
||||
};
|
||||
|
||||
template <typename SrcType, typename ScalarDepth, typename DstType>
|
||||
void divScalarImpl(const GpuMat& src, cv::Scalar value, bool inv, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef typename MakeVec<ScalarDepth, VecTraits<SrcType>::cn>::type ScalarType;
|
||||
|
||||
cv::Scalar_<ScalarDepth> value_ = value;
|
||||
|
||||
if (inv)
|
||||
{
|
||||
DivScalarInv<T, S, D> op(static_cast<S>(val));
|
||||
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
DivScalarOpInv<SrcType, ScalarType, DstType> op;
|
||||
op.val = VecTraits<ScalarType>::make(value_.val);
|
||||
|
||||
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
DivScalar<T, S, D> op(static_cast<S>(val));
|
||||
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
DivScalarOp<SrcType, ScalarType, DstType> op;
|
||||
op.val = VecTraits<ScalarType>::make(value_.val);
|
||||
|
||||
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void divScalar<schar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<ushort, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<ushort, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<short, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<short, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<int, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<int, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<int, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<int, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<int, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<int, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<int, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<float, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<float, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<float, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<double, double, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<double, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
||||
void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, Stream& stream);
|
||||
static const func_t funcs[7][7][4] =
|
||||
{
|
||||
{
|
||||
{divScalarImpl<uchar, float, uchar>, divScalarImpl<uchar2, float, uchar2>, divScalarImpl<uchar3, float, uchar3>, divScalarImpl<uchar4, float, uchar4>},
|
||||
{divScalarImpl<uchar, float, schar>, divScalarImpl<uchar2, float, char2>, divScalarImpl<uchar3, float, char3>, divScalarImpl<uchar4, float, char4>},
|
||||
{divScalarImpl<uchar, float, ushort>, divScalarImpl<uchar2, float, ushort2>, divScalarImpl<uchar3, float, ushort3>, divScalarImpl<uchar4, float, ushort4>},
|
||||
{divScalarImpl<uchar, float, short>, divScalarImpl<uchar2, float, short2>, divScalarImpl<uchar3, float, short3>, divScalarImpl<uchar4, float, short4>},
|
||||
{divScalarImpl<uchar, float, int>, divScalarImpl<uchar2, float, int2>, divScalarImpl<uchar3, float, int3>, divScalarImpl<uchar4, float, int4>},
|
||||
{divScalarImpl<uchar, float, float>, divScalarImpl<uchar2, float, float2>, divScalarImpl<uchar3, float, float3>, divScalarImpl<uchar4, float, float4>},
|
||||
{divScalarImpl<uchar, double, double>, divScalarImpl<uchar2, double, double2>, divScalarImpl<uchar3, double, double3>, divScalarImpl<uchar4, double, double4>}
|
||||
},
|
||||
{
|
||||
{divScalarImpl<schar, float, uchar>, divScalarImpl<char2, float, uchar2>, divScalarImpl<char3, float, uchar3>, divScalarImpl<char4, float, uchar4>},
|
||||
{divScalarImpl<schar, float, schar>, divScalarImpl<char2, float, char2>, divScalarImpl<char3, float, char3>, divScalarImpl<char4, float, char4>},
|
||||
{divScalarImpl<schar, float, ushort>, divScalarImpl<char2, float, ushort2>, divScalarImpl<char3, float, ushort3>, divScalarImpl<char4, float, ushort4>},
|
||||
{divScalarImpl<schar, float, short>, divScalarImpl<char2, float, short2>, divScalarImpl<char3, float, short3>, divScalarImpl<char4, float, short4>},
|
||||
{divScalarImpl<schar, float, int>, divScalarImpl<char2, float, int2>, divScalarImpl<char3, float, int3>, divScalarImpl<char4, float, int4>},
|
||||
{divScalarImpl<schar, float, float>, divScalarImpl<char2, float, float2>, divScalarImpl<char3, float, float3>, divScalarImpl<char4, float, float4>},
|
||||
{divScalarImpl<schar, double, double>, divScalarImpl<char2, double, double2>, divScalarImpl<char3, double, double3>, divScalarImpl<char4, double, double4>}
|
||||
},
|
||||
{
|
||||
{0 /*divScalarImpl<ushort, float, uchar>*/, 0 /*divScalarImpl<ushort2, float, uchar2>*/, 0 /*divScalarImpl<ushort3, float, uchar3>*/, 0 /*divScalarImpl<ushort4, float, uchar4>*/},
|
||||
{0 /*divScalarImpl<ushort, float, schar>*/, 0 /*divScalarImpl<ushort2, float, char2>*/, 0 /*divScalarImpl<ushort3, float, char3>*/, 0 /*divScalarImpl<ushort4, float, char4>*/},
|
||||
{divScalarImpl<ushort, float, ushort>, divScalarImpl<ushort2, float, ushort2>, divScalarImpl<ushort3, float, ushort3>, divScalarImpl<ushort4, float, ushort4>},
|
||||
{divScalarImpl<ushort, float, short>, divScalarImpl<ushort2, float, short2>, divScalarImpl<ushort3, float, short3>, divScalarImpl<ushort4, float, short4>},
|
||||
{divScalarImpl<ushort, float, int>, divScalarImpl<ushort2, float, int2>, divScalarImpl<ushort3, float, int3>, divScalarImpl<ushort4, float, int4>},
|
||||
{divScalarImpl<ushort, float, float>, divScalarImpl<ushort2, float, float2>, divScalarImpl<ushort3, float, float3>, divScalarImpl<ushort4, float, float4>},
|
||||
{divScalarImpl<ushort, double, double>, divScalarImpl<ushort2, double, double2>, divScalarImpl<ushort3, double, double3>, divScalarImpl<ushort4, double, double4>}
|
||||
},
|
||||
{
|
||||
{0 /*divScalarImpl<short, float, uchar>*/, 0 /*divScalarImpl<short2, float, uchar2>*/, 0 /*divScalarImpl<short3, float, uchar3>*/, 0 /*divScalarImpl<short4, float, uchar4>*/},
|
||||
{0 /*divScalarImpl<short, float, schar>*/, 0 /*divScalarImpl<short2, float, char2>*/, 0 /*divScalarImpl<short3, float, char3>*/, 0 /*divScalarImpl<short4, float, char4>*/},
|
||||
{divScalarImpl<short, float, ushort>, divScalarImpl<short2, float, ushort2>, divScalarImpl<short3, float, ushort3>, divScalarImpl<short4, float, ushort4>},
|
||||
{divScalarImpl<short, float, short>, divScalarImpl<short2, float, short2>, divScalarImpl<short3, float, short3>, divScalarImpl<short4, float, short4>},
|
||||
{divScalarImpl<short, float, int>, divScalarImpl<short2, float, int2>, divScalarImpl<short3, float, int3>, divScalarImpl<short4, float, int4>},
|
||||
{divScalarImpl<short, float, float>, divScalarImpl<short2, float, float2>, divScalarImpl<short3, float, float3>, divScalarImpl<short4, float, float4>},
|
||||
{divScalarImpl<short, double, double>, divScalarImpl<short2, double, double2>, divScalarImpl<short3, double, double3>, divScalarImpl<short4, double, double4>}
|
||||
},
|
||||
{
|
||||
{0 /*divScalarImpl<int, float, uchar>*/, 0 /*divScalarImpl<int2, float, uchar2>*/, 0 /*divScalarImpl<int3, float, uchar3>*/, 0 /*divScalarImpl<int4, float, uchar4>*/},
|
||||
{0 /*divScalarImpl<int, float, schar>*/, 0 /*divScalarImpl<int2, float, char2>*/, 0 /*divScalarImpl<int3, float, char3>*/, 0 /*divScalarImpl<int4, float, char4>*/},
|
||||
{0 /*divScalarImpl<int, float, ushort>*/, 0 /*divScalarImpl<int2, float, ushort2>*/, 0 /*divScalarImpl<int3, float, ushort3>*/, 0 /*divScalarImpl<int4, float, ushort4>*/},
|
||||
{0 /*divScalarImpl<int, float, short>*/, 0 /*divScalarImpl<int2, float, short2>*/, 0 /*divScalarImpl<int3, float, short3>*/, 0 /*divScalarImpl<int4, float, short4>*/},
|
||||
{divScalarImpl<int, float, int>, divScalarImpl<int2, float, int2>, divScalarImpl<int3, float, int3>, divScalarImpl<int4, float, int4>},
|
||||
{divScalarImpl<int, float, float>, divScalarImpl<int2, float, float2>, divScalarImpl<int3, float, float3>, divScalarImpl<int4, float, float4>},
|
||||
{divScalarImpl<int, double, double>, divScalarImpl<int2, double, double2>, divScalarImpl<int3, double, double3>, divScalarImpl<int4, double, double4>}
|
||||
},
|
||||
{
|
||||
{0 /*divScalarImpl<float, float, uchar>*/, 0 /*divScalarImpl<float2, float, uchar2>*/, 0 /*divScalarImpl<float3, float, uchar3>*/, 0 /*divScalarImpl<float4, float, uchar4>*/},
|
||||
{0 /*divScalarImpl<float, float, schar>*/, 0 /*divScalarImpl<float2, float, char2>*/, 0 /*divScalarImpl<float3, float, char3>*/, 0 /*divScalarImpl<float4, float, char4>*/},
|
||||
{0 /*divScalarImpl<float, float, ushort>*/, 0 /*divScalarImpl<float2, float, ushort2>*/, 0 /*divScalarImpl<float3, float, ushort3>*/, 0 /*divScalarImpl<float4, float, ushort4>*/},
|
||||
{0 /*divScalarImpl<float, float, short>*/, 0 /*divScalarImpl<float2, float, short2>*/, 0 /*divScalarImpl<float3, float, short3>*/, 0 /*divScalarImpl<float4, float, short4>*/},
|
||||
{0 /*divScalarImpl<float, float, int>*/, 0 /*divScalarImpl<float2, float, int2>*/, 0 /*divScalarImpl<float3, float, int3>*/, 0 /*divScalarImpl<float4, float, int4>*/},
|
||||
{divScalarImpl<float, float, float>, divScalarImpl<float2, float, float2>, divScalarImpl<float3, float, float3>, divScalarImpl<float4, float, float4>},
|
||||
{divScalarImpl<float, double, double>, divScalarImpl<float2, double, double2>, divScalarImpl<float3, double, double3>, divScalarImpl<float4, double, double4>}
|
||||
},
|
||||
{
|
||||
{0 /*divScalarImpl<double, double, uchar>*/, 0 /*divScalarImpl<double2, double, uchar2>*/, 0 /*divScalarImpl<double3, double, uchar3>*/, 0 /*divScalarImpl<double4, double, uchar4>*/},
|
||||
{0 /*divScalarImpl<double, double, schar>*/, 0 /*divScalarImpl<double2, double, char2>*/, 0 /*divScalarImpl<double3, double, char3>*/, 0 /*divScalarImpl<double4, double, char4>*/},
|
||||
{0 /*divScalarImpl<double, double, ushort>*/, 0 /*divScalarImpl<double2, double, ushort2>*/, 0 /*divScalarImpl<double3, double, ushort3>*/, 0 /*divScalarImpl<double4, double, ushort4>*/},
|
||||
{0 /*divScalarImpl<double, double, short>*/, 0 /*divScalarImpl<double2, double, short2>*/, 0 /*divScalarImpl<double3, double, short3>*/, 0 /*divScalarImpl<double4, double, short4>*/},
|
||||
{0 /*divScalarImpl<double, double, int>*/, 0 /*divScalarImpl<double2, double, int2>*/, 0 /*divScalarImpl<double3, double, int3>*/, 0 /*divScalarImpl<double4, double, int4>*/},
|
||||
{0 /*divScalarImpl<double, double, float>*/, 0 /*divScalarImpl<double2, double, float2>*/, 0 /*divScalarImpl<double3, double, float3>*/, 0 /*divScalarImpl<double4, double, float4>*/},
|
||||
{divScalarImpl<double, double, double>, divScalarImpl<double2, double, double2>, divScalarImpl<double3, double, double3>, divScalarImpl<double4, double, double4>}
|
||||
}
|
||||
};
|
||||
|
||||
const int sdepth = src.depth();
|
||||
const int ddepth = dst.depth();
|
||||
const int cn = src.channels();
|
||||
|
||||
CV_DbgAssert( sdepth < 7 && ddepth < 7 && cn <= 4 );
|
||||
|
||||
if (inv)
|
||||
{
|
||||
val[0] *= scale;
|
||||
val[1] *= scale;
|
||||
val[2] *= scale;
|
||||
val[3] *= scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
val[0] /= scale;
|
||||
val[1] /= scale;
|
||||
val[2] /= scale;
|
||||
val[3] /= scale;
|
||||
}
|
||||
|
||||
const func_t func = funcs[sdepth][ddepth][cn - 1];
|
||||
|
||||
if (!func)
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
||||
|
||||
func(src, val, inv, dst, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -401,229 +401,11 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// divide
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void divMat_8uc4_32f(PtrStepSz<unsigned int> src1, PtrStepSzf src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
|
||||
void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
|
||||
void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
|
||||
void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
|
||||
|
||||
void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename S, typename D>
|
||||
void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
}
|
||||
|
||||
static void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream, int)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{
|
||||
arithm::divMat<unsigned char, float, unsigned char>,
|
||||
arithm::divMat<unsigned char, float, signed char>,
|
||||
arithm::divMat<unsigned char, float, unsigned short>,
|
||||
arithm::divMat<unsigned char, float, short>,
|
||||
arithm::divMat<unsigned char, float, int>,
|
||||
arithm::divMat<unsigned char, float, float>,
|
||||
arithm::divMat<unsigned char, double, double>
|
||||
},
|
||||
{
|
||||
arithm::divMat<signed char, float, unsigned char>,
|
||||
arithm::divMat<signed char, float, signed char>,
|
||||
arithm::divMat<signed char, float, unsigned short>,
|
||||
arithm::divMat<signed char, float, short>,
|
||||
arithm::divMat<signed char, float, int>,
|
||||
arithm::divMat<signed char, float, float>,
|
||||
arithm::divMat<signed char, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divMat<unsigned short, float, unsigned char>*/,
|
||||
0 /*arithm::divMat<unsigned short, float, signed char>*/,
|
||||
arithm::divMat<unsigned short, float, unsigned short>,
|
||||
arithm::divMat<unsigned short, float, short>,
|
||||
arithm::divMat<unsigned short, float, int>,
|
||||
arithm::divMat<unsigned short, float, float>,
|
||||
arithm::divMat<unsigned short, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divMat<short, float, unsigned char>*/,
|
||||
0 /*arithm::divMat<short, float, signed char>*/,
|
||||
arithm::divMat<short, float, unsigned short>,
|
||||
arithm::divMat<short, float, short>,
|
||||
arithm::divMat<short, float, int>,
|
||||
arithm::divMat<short, float, float>,
|
||||
arithm::divMat<short, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divMat<int, float, unsigned char>*/,
|
||||
0 /*arithm::divMat<int, float, signed char>*/,
|
||||
0 /*arithm::divMat<int, float, unsigned short>*/,
|
||||
0 /*arithm::divMat<int, float, short>*/,
|
||||
arithm::divMat<int, float, int>,
|
||||
arithm::divMat<int, float, float>,
|
||||
arithm::divMat<int, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divMat<float, float, unsigned char>*/,
|
||||
0 /*arithm::divMat<float, float, signed char>*/,
|
||||
0 /*arithm::divMat<float, float, unsigned short>*/,
|
||||
0 /*arithm::divMat<float, float, short>*/,
|
||||
0 /*arithm::divMat<float, float, int>*/,
|
||||
arithm::divMat<float, float, float>,
|
||||
arithm::divMat<float, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divMat<double, double, unsigned char>*/,
|
||||
0 /*arithm::divMat<double, double, signed char>*/,
|
||||
0 /*arithm::divMat<double, double, unsigned short>*/,
|
||||
0 /*arithm::divMat<double, double, short>*/,
|
||||
0 /*arithm::divMat<double, double, int>*/,
|
||||
0 /*arithm::divMat<double, double, float>*/,
|
||||
arithm::divMat<double, double, double>
|
||||
}
|
||||
};
|
||||
|
||||
const int sdepth = src1.depth();
|
||||
const int ddepth = dst.depth();
|
||||
const int cn = src1.channels();
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
|
||||
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
|
||||
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
|
||||
|
||||
const func_t func = funcs[sdepth][ddepth];
|
||||
|
||||
if (!func)
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
||||
|
||||
func(src1_, src2_, dst_, scale, stream);
|
||||
}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D>
|
||||
void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
static void divScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double scale, Stream& _stream, int)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{
|
||||
arithm::divScalar<unsigned char, float, unsigned char>,
|
||||
arithm::divScalar<unsigned char, float, signed char>,
|
||||
arithm::divScalar<unsigned char, float, unsigned short>,
|
||||
arithm::divScalar<unsigned char, float, short>,
|
||||
arithm::divScalar<unsigned char, float, int>,
|
||||
arithm::divScalar<unsigned char, float, float>,
|
||||
arithm::divScalar<unsigned char, double, double>
|
||||
},
|
||||
{
|
||||
arithm::divScalar<signed char, float, unsigned char>,
|
||||
arithm::divScalar<signed char, float, signed char>,
|
||||
arithm::divScalar<signed char, float, unsigned short>,
|
||||
arithm::divScalar<signed char, float, short>,
|
||||
arithm::divScalar<signed char, float, int>,
|
||||
arithm::divScalar<signed char, float, float>,
|
||||
arithm::divScalar<signed char, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divScalar<unsigned short, float, unsigned char>*/,
|
||||
0 /*arithm::divScalar<unsigned short, float, signed char>*/,
|
||||
arithm::divScalar<unsigned short, float, unsigned short>,
|
||||
arithm::divScalar<unsigned short, float, short>,
|
||||
arithm::divScalar<unsigned short, float, int>,
|
||||
arithm::divScalar<unsigned short, float, float>,
|
||||
arithm::divScalar<unsigned short, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divScalar<short, float, unsigned char>*/,
|
||||
0 /*arithm::divScalar<short, float, signed char>*/,
|
||||
arithm::divScalar<short, float, unsigned short>,
|
||||
arithm::divScalar<short, float, short>,
|
||||
arithm::divScalar<short, float, int>,
|
||||
arithm::divScalar<short, float, float>,
|
||||
arithm::divScalar<short, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divScalar<int, float, unsigned char>*/,
|
||||
0 /*arithm::divScalar<int, float, signed char>*/,
|
||||
0 /*arithm::divScalar<int, float, unsigned short>*/,
|
||||
0 /*arithm::divScalar<int, float, short>*/,
|
||||
arithm::divScalar<int, float, int>,
|
||||
arithm::divScalar<int, float, float>,
|
||||
arithm::divScalar<int, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divScalar<float, float, unsigned char>*/,
|
||||
0 /*arithm::divScalar<float, float, signed char>*/,
|
||||
0 /*arithm::divScalar<float, float, unsigned short>*/,
|
||||
0 /*arithm::divScalar<float, float, short>*/,
|
||||
0 /*arithm::divScalar<float, float, int>*/,
|
||||
arithm::divScalar<float, float, float>,
|
||||
arithm::divScalar<float, double, double>
|
||||
},
|
||||
{
|
||||
0 /*arithm::divScalar<double, double, unsigned char>*/,
|
||||
0 /*arithm::divScalar<double, double, signed char>*/,
|
||||
0 /*arithm::divScalar<double, double, unsigned short>*/,
|
||||
0 /*arithm::divScalar<double, double, short>*/,
|
||||
0 /*arithm::divScalar<double, double, int>*/,
|
||||
0 /*arithm::divScalar<double, double, float>*/,
|
||||
arithm::divScalar<double, double, double>
|
||||
}
|
||||
};
|
||||
|
||||
typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream);
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
{
|
||||
{NppArithmScalar<CV_8U , 1, nppiDivC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiDivC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiDivC_8u_C4RSfs >::call},
|
||||
{0 , 0, 0 , 0 },
|
||||
{NppArithmScalar<CV_16U, 1, nppiDivC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiDivC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiDivC_16u_C4RSfs>::call},
|
||||
{NppArithmScalar<CV_16S, 1, nppiDivC_16s_C1RSfs>::call, 0, NppArithmScalar<CV_16S, 3, nppiDivC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiDivC_16s_C4RSfs>::call},
|
||||
{NppArithmScalar<CV_32S, 1, nppiDivC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiDivC_32s_C3RSfs>::call, 0 },
|
||||
{NppArithmScalar<CV_32F, 1, nppiDivC_32f_C1R >::call, 0, NppArithmScalar<CV_32F, 3, nppiDivC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiDivC_32f_C4R >::call},
|
||||
{0 , 0, 0 , 0 }
|
||||
};
|
||||
|
||||
const int sdepth = src.depth();
|
||||
const int ddepth = dst.depth();
|
||||
const int cn = src.channels();
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
||||
|
||||
if (inv)
|
||||
{
|
||||
val[0] *= scale;
|
||||
val[1] *= scale;
|
||||
val[2] *= scale;
|
||||
val[3] *= scale;
|
||||
}
|
||||
else
|
||||
{
|
||||
val[0] /= scale;
|
||||
val[1] /= scale;
|
||||
val[2] /= scale;
|
||||
val[3] /= scale;
|
||||
}
|
||||
|
||||
const npp_func_t npp_func = npp_funcs[sdepth][cn - 1];
|
||||
if (ddepth == sdepth && cn > 1 && npp_func != 0 && !inv)
|
||||
{
|
||||
npp_func(src, val, dst, stream);
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Assert( cn == 1 );
|
||||
|
||||
const func_t func = funcs[sdepth][ddepth];
|
||||
|
||||
if (!func)
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
||||
|
||||
func(src, val[0], inv, dst, stream);
|
||||
}
|
||||
void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
|
||||
|
||||
void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream)
|
||||
{
|
||||
@ -637,7 +419,7 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
|
||||
_dst.create(src1.size(), src1.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
arithm::divMat_8uc4_32f(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
divMat_8uc4_32f(src1, src2, dst, stream);
|
||||
}
|
||||
else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
|
||||
{
|
||||
@ -649,7 +431,7 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
|
||||
_dst.create(src1.size(), src1.type());
|
||||
GpuMat dst = _dst.getGpuMat();
|
||||
|
||||
arithm::divMat_16sc4_32f(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
divMat_16sc4_32f(src1, src2, dst, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user