mirror of
https://github.com/opencv/opencv.git
synced 2024-12-19 03:58:31 +08:00
113 lines
3.7 KiB
C++
113 lines
3.7 KiB
C++
|
/*
|
||
|
* By downloading, copying, installing or using the software you agree to this license.
|
||
|
* If you do not agree to this license, do not download, install,
|
||
|
* copy or use the software.
|
||
|
*
|
||
|
*
|
||
|
* License Agreement
|
||
|
* For Open Source Computer Vision Library
|
||
|
* (3-clause BSD License)
|
||
|
*
|
||
|
* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
|
||
|
* Third party copyrights are property of their respective owners.
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without modification,
|
||
|
* are permitted provided that the following conditions are met:
|
||
|
*
|
||
|
* * Redistributions of source code must retain the above copyright notice,
|
||
|
* this list of conditions and the following disclaimer.
|
||
|
*
|
||
|
* * Redistributions in binary form must reproduce the above copyright notice,
|
||
|
* this list of conditions and the following disclaimer in the documentation
|
||
|
* and/or other materials provided with the distribution.
|
||
|
*
|
||
|
* * Neither the names of the copyright holders nor the names of the contributors
|
||
|
* may be used to endorse or promote products derived from this software
|
||
|
* without specific prior written permission.
|
||
|
*
|
||
|
* This software is provided by the copyright holders and contributors "as is" and
|
||
|
* any express or implied warranties, including, but not limited to, the implied
|
||
|
* warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||
|
* In no event shall copyright holders or contributors be liable for any direct,
|
||
|
* indirect, incidental, special, exemplary, or consequential damages
|
||
|
* (including, but not limited to, procurement of substitute goods or services;
|
||
|
* loss of use, data, or profits; or business interruption) however caused
|
||
|
* and on any theory of liability, whether in contract, strict liability,
|
||
|
* or tort (including negligence or otherwise) arising in any way out of
|
||
|
* the use of this software, even if advised of the possibility of such damage.
|
||
|
*/
|
||
|
|
||
|
#ifndef CAROTENE_INTRINSICS_HPP
|
||
|
#define CAROTENE_INTRINSICS_HPP
|
||
|
|
||
|
#include <carotene/definitions.hpp>
|
||
|
|
||
|
#include <arm_neon.h>
|
||
|
|
||
|
namespace CAROTENE_NS { namespace internal {
|
||
|
|
||
|
/////////////// Custom NEON intrinsics ///////////////////
|
||
|
|
||
|
// calculate reciprocal value
|
||
|
|
||
|
inline float32x4_t vrecpq_f32(float32x4_t val)
|
||
|
{
|
||
|
float32x4_t reciprocal = vrecpeq_f32(val);
|
||
|
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
|
||
|
reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
|
||
|
return reciprocal;
|
||
|
}
|
||
|
|
||
|
inline float32x2_t vrecp_f32(float32x2_t val)
|
||
|
{
|
||
|
float32x2_t reciprocal = vrecpe_f32(val);
|
||
|
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
|
||
|
reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
|
||
|
return reciprocal;
|
||
|
}
|
||
|
|
||
|
// caclulate sqrt value
|
||
|
|
||
|
inline float32x4_t vrsqrtq_f32(float32x4_t val)
|
||
|
{
|
||
|
float32x4_t e = vrsqrteq_f32(val);
|
||
|
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
|
||
|
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
|
||
|
return e;
|
||
|
}
|
||
|
|
||
|
inline float32x2_t vrsqrt_f32(float32x2_t val)
|
||
|
{
|
||
|
float32x2_t e = vrsqrte_f32(val);
|
||
|
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
|
||
|
e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
|
||
|
return e;
|
||
|
}
|
||
|
|
||
|
inline float32x4_t vsqrtq_f32(float32x4_t val)
|
||
|
{
|
||
|
return vrecpq_f32(vrsqrtq_f32(val));
|
||
|
}
|
||
|
|
||
|
inline float32x2_t vsqrt_f32(float32x2_t val)
|
||
|
{
|
||
|
return vrecp_f32(vrsqrt_f32(val));
|
||
|
}
|
||
|
|
||
|
// table lookup with the table in a 128-bit register
|
||
|
|
||
|
inline uint8x8_t vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
|
||
|
{
|
||
|
#ifdef __aarch64__
|
||
|
// AArch64 supports this natively
|
||
|
return ::vqtbl1_u8(a, b);
|
||
|
#else
|
||
|
union { uint8x16_t v; uint8x8x2_t w; } u = { a };
|
||
|
return vtbl2_u8(u.w, b);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
} }
|
||
|
|
||
|
#endif
|