opencv/3rdparty/carotene/src/vround_helper.hpp
Kumataro dba7186378
Merge pull request #24271 from Kumataro:fix24163
Fix to convert float32 to int32/uint32 with rounding to nearest (ties to even). #24271

Fix https://github.com/opencv/opencv/issues/24163

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake

(carotene is BSD)
2023-12-25 12:17:17 +03:00

103 lines
3.6 KiB
C++

/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3-clause BSD License)
*
* Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
* Third party copyrights are property of their respective owners.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort (including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#ifndef CAROTENE_SRC_VROUND_HELPER_HPP
#define CAROTENE_SRC_VROUND_HELPER_HPP
#include "common.hpp"
#include "vtransform.hpp"
#ifdef CAROTENE_NEON
/**
* This helper header is for rounding from float32xN to uin32xN or int32xN to nearest, ties to even.
* See https://en.wikipedia.org/wiki/Rounding#Rounding_half_to_even
*/
// See https://github.com/opencv/opencv/pull/24271#issuecomment-1867318007
#define CAROTENE_ROUND_DELTA (12582912.0f)
namespace CAROTENE_NS { namespace internal {
inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
{
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
return vcvtnq_u32_f32(val);
#else
const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
return vcvtq_u32_f32(vsubq_f32(vaddq_f32(val, delta), delta));
#endif
}
inline uint32x2_t vround_u32_f32(const float32x2_t val)
{
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
return vcvtn_u32_f32(val);
#else
const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
return vcvt_u32_f32(vsub_f32(vadd_f32(val, delta), delta));
#endif
}
inline int32x4_t vroundq_s32_f32(const float32x4_t val)
{
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
return vcvtnq_s32_f32(val);
#else
const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
return vcvtq_s32_f32(vsubq_f32(vaddq_f32(val, delta), delta));
#endif
}
inline int32x2_t vround_s32_f32(const float32x2_t val)
{
#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
return vcvtn_s32_f32(val);
#else
const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
return vcvt_s32_f32(vsub_f32(vadd_f32(val, delta), delta));
#endif
}
} }
#endif // CAROTENE_NEON
#endif