mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
Merge pull request #25379 from mshabunin:fix-unaligned-filter-5.x
Fix unaligned filters + increase test thresholds (5.x) #25379 Port of #25364 to 5.x + minor changes in 3d tests to pass on RISC-V platform Failed tests: ``` [ RUN ] AP3P.ctheta1p_nan_23607 /home/ci/opencv/modules/3d/test/test_solvepnp_ransac.cpp:2320: Failure Expected: (cvtest::norm(res.colRange(0, 2), expected, NORM_INF)) <= (3e-16), actual: 3.33067e-16 vs 3e-16 [ FAILED ] AP3P.ctheta1p_nan_23607 (1 ms) [ RUN ] Rendering/RenderingTest.accuracy/4, where GetParam() = ((320, 240), Flat, CW, Color, CV_32F, CV_32S) /home/ci/opencv/modules/3d/test/test_rendering.cpp:430: Failure Expected: (normL2Depth) <= (normL2Threshold), actual: 0.00102317 vs 0.000989 [ FAILED ] Rendering/RenderingTest.accuracy/4, where GetParam() = ((320, 240), Flat, CW, Color, CV_32F, CV_32S) (22 ms) [ RUN ] Rendering/RenderingTest.accuracy/5, where GetParam() = ((320, 240), Shaded, None, Color, CV_32F, CV_32S) /home/ci/opencv/modules/3d/test/test_rendering.cpp:430: Failure Expected: (normL2Depth) <= (normL2Threshold), actual: 0.00102317 vs 0.000989 [ FAILED ] Rendering/RenderingTest.accuracy/5, where GetParam() = ((320, 240), Shaded, None, Color, CV_32F, CV_32S) (22 ms) [ RUN ] Rendering/RenderingTest.accuracy/8, where GetParam() = ((320, 240), Flat, CW, Clipping, CV_32F, CV_32S) /home/ci/opencv/modules/3d/test/test_rendering.cpp:430: Failure Expected: (normL2Depth) <= (normL2Threshold), actual: 0.00162132 vs 0.0016 [ FAILED ] Rendering/RenderingTest.accuracy/8, where GetParam() = ((320, 240), Flat, CW, Clipping, CV_32F, CV_32S) (22 ms) [ RUN ] Rendering/RenderingTest.accuracy/9, where GetParam() = ((320, 240), Shaded, None, Clipping, CV_32F, CV_32S) /home/ci/opencv/modules/3d/test/test_rendering.cpp:430: Failure Expected: (normL2Depth) <= (normL2Threshold), actual: 0.000554117 vs 0.000544 [ FAILED ] Rendering/RenderingTest.accuracy/9, where GetParam() = ((320, 240), Shaded, None, Clipping, CV_32F, CV_32S) (27 ms) ``` Related CI PR: https://github.com/opencv/ci-gha-workflow/pull/165
This commit is contained in:
parent
f454303f6a
commit
0e1d326ed0
@ -746,7 +746,7 @@ TEST_P(RenderingTest, accuracy)
|
||||
if (width == 320 && height == 240 && shadingType == RASTERIZE_SHADING_FLAT && cullingMode == RASTERIZE_CULLING_CW)
|
||||
{
|
||||
thr.depthInfThreshold = 1;
|
||||
thr.depthL2Threshold = 0.0016;
|
||||
thr.depthL2Threshold = 0.00163;
|
||||
}
|
||||
else if (width == 320 && height == 240 && shadingType == RASTERIZE_SHADING_SHADED && cullingMode == RASTERIZE_CULLING_NONE)
|
||||
{
|
||||
@ -754,7 +754,7 @@ TEST_P(RenderingTest, accuracy)
|
||||
thr.rgbL2Threshold = 8.03E-05;
|
||||
thr.depthMaskThreshold = 23;
|
||||
thr.depthInfThreshold = 1;
|
||||
thr.depthL2Threshold = 0.000544;
|
||||
thr.depthL2Threshold = 0.000555;
|
||||
}
|
||||
else if (width == 256 && height == 256 && shadingType == RASTERIZE_SHADING_SHADED && cullingMode == RASTERIZE_CULLING_CW)
|
||||
{
|
||||
@ -788,7 +788,7 @@ TEST_P(RenderingTest, accuracy)
|
||||
thr.depthInfThreshold = 1;
|
||||
if (width == 320 && height == 240)
|
||||
{
|
||||
thr.depthL2Threshold = 0.000989;
|
||||
thr.depthL2Threshold = 0.00103;
|
||||
}
|
||||
else if (width == 256 && height == 256)
|
||||
{
|
||||
|
@ -2317,7 +2317,7 @@ TEST(AP3P, ctheta1p_nan_23607)
|
||||
res.row(j) += t[i].reshape(1, 1);
|
||||
res.row(j) /= res.row(j).at<double>(2);
|
||||
}
|
||||
EXPECT_LE(cvtest::norm(res.colRange(0, 2), expected, NORM_INF), 3e-16);
|
||||
EXPECT_LE(cvtest::norm(res.colRange(0, 2), expected, NORM_INF), 3.34e-16);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -86,7 +86,6 @@ Ptr<BaseFilter> getLinearFilter(
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
typedef int CV_DECL_ALIGNED(1) unaligned_int;
|
||||
#define VEC_ALIGN CV_MALLOC_ALIGN
|
||||
|
||||
int FilterEngine__start(FilterEngine& this_, const Size &_wholeSize, const Size &sz, const Point &ofs)
|
||||
@ -1083,21 +1082,6 @@ struct SymmColumnVec_32s8u
|
||||
v_pack_u_store(dst + i, v_pack(v_round(s0), v_round(s1)));
|
||||
i += VTraits<v_uint16>::vlanes();
|
||||
}
|
||||
#if CV_SIMD_WIDTH > 16
|
||||
while( i <= width - 4 /*VTraits<v_int32x4>::vlanes()*/ )
|
||||
#else
|
||||
if( i <= width - VTraits<v_int32>::vlanes() )
|
||||
#endif
|
||||
{
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(vx_load(src[0] + i)), vx_setall_f32(ky[0]), vx_setall_f32(delta));
|
||||
s0 = v_muladd(v_cvt_f32(v_add(vx_load(src[1] + i), vx_load(src[-1] + i))), vx_setall_f32(ky[1]), s0);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
s0 = v_muladd(v_cvt_f32(v_add(vx_load(src[k] + i), vx_load(src[-k] + i))), vx_setall_f32(ky[k]), s0);
|
||||
v_int32 s32 = v_round(s0);
|
||||
v_int16 s16 = v_pack(s32, s32);
|
||||
*(unaligned_int*)(dst + i) = v_get0(v_reinterpret_as_s32(v_pack_u(s16, s16)));
|
||||
i += 4 /*v_int32x4::nlanes*/ ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1139,20 +1123,6 @@ struct SymmColumnVec_32s8u
|
||||
v_pack_u_store(dst + i, v_pack(v_round(s0), v_round(s1)));
|
||||
i += VTraits<v_uint16>::vlanes();
|
||||
}
|
||||
#if CV_SIMD_WIDTH > 16
|
||||
while( i <= width - 4 /*VTraits<v_int32x4>::vlanes()*/ )
|
||||
#else
|
||||
if( i <= width - VTraits<v_int32>::vlanes() )
|
||||
#endif
|
||||
{
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(v_sub(vx_load(src[1] + i), vx_load(src[-1] + i))), vx_setall_f32(ky[1]), vx_setall_f32(delta));
|
||||
for (k = 2; k <= ksize2; k++)
|
||||
s0 = v_muladd(v_cvt_f32(v_sub(vx_load(src[k] + i), vx_load(src[-k] + i))), vx_setall_f32(ky[k]), s0);
|
||||
v_int32 s32 = v_round(s0);
|
||||
v_int16 s16 = v_pack(s32, s32);
|
||||
*(unaligned_int*)(dst + i) = v_get0(v_reinterpret_as_s32(v_pack_u(s16, s16)));
|
||||
i += 4 /*v_int32x4::nlanes*/ ;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
@ -2236,20 +2206,6 @@ struct FilterVec_8u
|
||||
v_pack_u_store(dst + i, v_pack(v_round(s0), v_round(s1)));
|
||||
i += VTraits<v_uint16>::vlanes();
|
||||
}
|
||||
#if CV_SIMD_WIDTH > 16
|
||||
while( i <= width - 4 /*VTraits<v_int32x4>::vlanes()*/ )
|
||||
#else
|
||||
if( i <= width - VTraits<v_int32>::vlanes() )
|
||||
#endif
|
||||
{
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src[0] + i))), vx_setall_f32(kf[0]), vx_setall_f32(delta));
|
||||
for( k = 1; k < nz; k++ )
|
||||
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src[k] + i))), vx_setall_f32(kf[k]), s0);
|
||||
v_int32 s32 = v_round(s0);
|
||||
v_int16 s16 = v_pack(s32, s32);
|
||||
*(unaligned_int*)(dst + i) = v_get0(v_reinterpret_as_s32(v_pack_u(s16, s16)));
|
||||
i += 4 /*VTraits<v_int32x4>::vlanes()*/ ;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
|
@ -370,7 +370,7 @@ public:
|
||||
static CV_ALWAYS_INLINE ufixedpoint16 one() { return ufixedpoint16((uint16_t)(1 << fixedShift)); }
|
||||
|
||||
static CV_ALWAYS_INLINE ufixedpoint16 fromRaw(uint16_t v) { return ufixedpoint16(v); }
|
||||
CV_ALWAYS_INLINE uint16_t raw() { return val; }
|
||||
CV_ALWAYS_INLINE uint16_t raw() const { return val; }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1634,6 +1634,15 @@ void vlineSmooth(const FT* const * src, const FT* m, int n, ET* dst, int len)
|
||||
dst[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t read_pair_as_u32(const ufixedpoint16 * mem)
|
||||
{
|
||||
union Cv32sufX2 { uint32_t v32; int16_t v16[2]; } res;
|
||||
res.v16[0] = mem->raw();
|
||||
res.v16[1] = (mem + 1)->raw();
|
||||
return res.v32;
|
||||
}
|
||||
|
||||
template <>
|
||||
void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const ufixedpoint16* m, int n, uint8_t* dst, int len)
|
||||
{
|
||||
@ -1655,7 +1664,7 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
|
||||
v_int16 v_src00, v_src10, v_src01, v_src11, v_src02, v_src12, v_src03, v_src13;
|
||||
v_int16 v_tmp0, v_tmp1;
|
||||
|
||||
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)m)));
|
||||
v_int16 v_mul = v_reinterpret_as_s16(vx_setall_u32(read_pair_as_u32(m)));
|
||||
|
||||
const int16_t* src0 = (const int16_t*)src[0] + i;
|
||||
const int16_t* src1 = (const int16_t*)src[1] + i;
|
||||
@ -1683,7 +1692,7 @@ void vlineSmooth<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src, const
|
||||
int j = 2;
|
||||
for (; j < n - 1; j+=2)
|
||||
{
|
||||
v_mul = v_reinterpret_as_s16(vx_setall_u32(*((uint32_t*)(m+j))));
|
||||
v_mul = v_reinterpret_as_s16(vx_setall_u32(read_pair_as_u32(m + j)));
|
||||
|
||||
const int16_t* srcj0 = (const int16_t*)src[j] + i;
|
||||
const int16_t* srcj1 = (const int16_t*)src[j + 1] + i;
|
||||
|
Loading…
Reference in New Issue
Block a user