mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
fix build error on JetsonTK1
* avoid using vld1_f16 and vst1_f16 on gcc 4 series (Ubuntu 14.04) * guard correctly with #if * use static inline
This commit is contained in:
parent
39ebca5dbf
commit
3debc78a5f
@ -4545,6 +4545,16 @@ static short convertFp16SW(float fp32)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if CV_FP16 && (defined __GNUC__) && (defined __arm__ || defined __aarch64__)
|
||||||
|
#if 5 <= __GNUC__
|
||||||
|
static inline float16x4_t load_f16(const short* p) { return vld1_f16((const float16_t*)p); }
|
||||||
|
static inline void store_f16(short* p, float16x4_t v) { vst1_f16((float16_t*)p, v); }
|
||||||
|
#else
|
||||||
|
static inline float16x4_t load_f16(const short* p) { return (float16x4_t)vld1_s16(p); }
|
||||||
|
static inline void store_f16(short* p, float16x4_t v) { vst1_s16(p, (int16x4_t)v); }
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// template for FP16 HW conversion function
|
// template for FP16 HW conversion function
|
||||||
template<typename T, typename DT> static void
|
template<typename T, typename DT> static void
|
||||||
cvtScaleHalf_( const T* src, size_t sstep, DT* dst, size_t dstep, Size size);
|
cvtScaleHalf_( const T* src, size_t sstep, DT* dst, size_t dstep, Size size);
|
||||||
@ -4579,7 +4589,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
|
|||||||
|
|
||||||
float16x4_t v_dst = vcvt_f16_f32(v_src);
|
float16x4_t v_dst = vcvt_f16_f32(v_src);
|
||||||
|
|
||||||
vst1_f16((float16_t*)(dst + x), v_dst);
|
store_f16(dst + x, v_dst);
|
||||||
#else
|
#else
|
||||||
#error "Configuration error"
|
#error "Configuration error"
|
||||||
#endif
|
#endif
|
||||||
@ -4631,7 +4641,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
|
|||||||
|
|
||||||
_mm_storeu_ps(dst + x, v_dst);
|
_mm_storeu_ps(dst + x, v_dst);
|
||||||
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
||||||
float16x4_t v_src = vld1_f16((float16_t*)(src + x));
|
float16x4_t v_src = load_f16(src+x);
|
||||||
|
|
||||||
float32x4_t v_dst = vcvt_f32_f16(v_src);
|
float32x4_t v_dst = vcvt_f32_f16(v_src);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user