mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
brush up divSaturate of carotene
* use rounding to nearest same as divSaturateQ
This commit is contained in:
parent
d18e45b442
commit
2a91453ef1
11
3rdparty/carotene/src/div.cpp
vendored
11
3rdparty/carotene/src/div.cpp
vendored
@ -74,6 +74,13 @@ template <>
|
|||||||
inline uint32x4_t divSaturateQ<uint32x4_t>(const uint32x4_t &v1, const uint32x4_t &v2, const float scale)
|
inline uint32x4_t divSaturateQ<uint32x4_t>(const uint32x4_t &v1, const uint32x4_t &v2, const float scale)
|
||||||
{ return vcvtq_u32_f32(vroundq(vmulq_f32(vmulq_n_f32(vcvtq_f32_u32(v1), scale), internal::vrecpq_f32(vcvtq_f32_u32(v2))))); }
|
{ return vcvtq_u32_f32(vroundq(vmulq_f32(vmulq_n_f32(vcvtq_f32_u32(v1), scale), internal::vrecpq_f32(vcvtq_f32_u32(v2))))); }
|
||||||
|
|
||||||
|
inline float32x2_t vround(const float32x2_t& v)
|
||||||
|
{
|
||||||
|
const int32x2_t signMask = vdup_n_s32(1 << 31), half = vreinterpret_s32_f32(vdup_n_f32(0.5f));
|
||||||
|
float32x2_t v_addition = vreinterpret_f32_s32(vorr_s32(half, vand_s32(signMask, vreinterpret_s32_f32(v))));
|
||||||
|
return vadd_f32(v, v_addition);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline T divSaturate(const T &v1, const T &v2, const float scale)
|
inline T divSaturate(const T &v1, const T &v2, const float scale)
|
||||||
{
|
{
|
||||||
@ -81,10 +88,10 @@ inline T divSaturate(const T &v1, const T &v2, const float scale)
|
|||||||
}
|
}
|
||||||
template <>
|
template <>
|
||||||
inline int32x2_t divSaturate<int32x2_t>(const int32x2_t &v1, const int32x2_t &v2, const float scale)
|
inline int32x2_t divSaturate<int32x2_t>(const int32x2_t &v1, const int32x2_t &v2, const float scale)
|
||||||
{ return vcvt_s32_f32(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2)))); }
|
{ return vcvt_s32_f32(vround(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2))))); }
|
||||||
template <>
|
template <>
|
||||||
inline uint32x2_t divSaturate<uint32x2_t>(const uint32x2_t &v1, const uint32x2_t &v2, const float scale)
|
inline uint32x2_t divSaturate<uint32x2_t>(const uint32x2_t &v1, const uint32x2_t &v2, const float scale)
|
||||||
{ return vcvt_u32_f32(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2)))); }
|
{ return vcvt_u32_f32(vround(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2))))); }
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
Loading…
Reference in New Issue
Block a user