core(SIMD): align behavior of vector constructors

- setzero() calls are dropped due low-level API nature - initialization is mandatory if necessary (not an output of other calls)
2025-06-07 17:44:04 +08:00 · 2020-04-12 18:18:17 +00:00 · 2020-04-12 18:18:17 +00:00 · dcf7eb972e
commit dcf7eb972e
parent 0812207db7
7 changed files with 170 additions and 80 deletions
--- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp
@ -159,7 +159,8 @@ struct v_uint8x32
            (char)v22, (char)v23, (char)v24, (char)v25, (char)v26, (char)v27,
            (char)v28, (char)v29, (char)v30, (char)v31);
    }
-    v_uint8x32() : val(_mm256_setzero_si256()) {}
+    v_uint8x32() {}
+
    uchar get0() const { return (uchar)_v_cvtsi256_si32(val); }
 };

@ -183,7 +184,8 @@ struct v_int8x32
            v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20,
            v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31);
    }
-    v_int8x32() : val(_mm256_setzero_si256()) {}
+    v_int8x32() {}
+
    schar get0() const { return (schar)_v_cvtsi256_si32(val); }
 };

@ -203,7 +205,8 @@ struct v_uint16x16
            (short)v4,  (short)v5,  (short)v6,  (short)v7,  (short)v8,  (short)v9,
            (short)v10, (short)v11, (short)v12, (short)v13, (short)v14, (short)v15);
    }
-    v_uint16x16() : val(_mm256_setzero_si256()) {}
+    v_uint16x16() {}
+
    ushort get0() const { return (ushort)_v_cvtsi256_si32(val); }
 };

@ -222,7 +225,8 @@ struct v_int16x16
        val = _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7,
            v8, v9, v10, v11, v12, v13, v14, v15);
    }
-    v_int16x16() : val(_mm256_setzero_si256()) {}
+    v_int16x16() {}
+
    short get0() const { return (short)_v_cvtsi256_si32(val); }
 };

@ -239,7 +243,8 @@ struct v_uint32x8
        val = _mm256_setr_epi32((unsigned)v0, (unsigned)v1, (unsigned)v2,
            (unsigned)v3, (unsigned)v4, (unsigned)v5, (unsigned)v6, (unsigned)v7);
    }
-    v_uint32x8() : val(_mm256_setzero_si256()) {}
+    v_uint32x8() {}
+
    unsigned get0() const { return (unsigned)_v_cvtsi256_si32(val); }
 };

@ -255,7 +260,8 @@ struct v_int32x8
    {
        val = _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7);
    }
-    v_int32x8() : val(_mm256_setzero_si256()) {}
+    v_int32x8() {}
+
    int get0() const { return _v_cvtsi256_si32(val); }
 };

@ -271,7 +277,8 @@ struct v_float32x8
    {
        val = _mm256_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7);
    }
-    v_float32x8() : val(_mm256_setzero_ps()) {}
+    v_float32x8() {}
+
    float get0() const { return _mm_cvtss_f32(_mm256_castps256_ps128(val)); }
 };

@ -284,7 +291,7 @@ struct v_uint64x4
    explicit v_uint64x4(__m256i v) : val(v) {}
    v_uint64x4(uint64 v0, uint64 v1, uint64 v2, uint64 v3)
    { val = _mm256_setr_epi64x((int64)v0, (int64)v1, (int64)v2, (int64)v3); }
-    v_uint64x4() : val(_mm256_setzero_si256()) {}
+    v_uint64x4() {}
    uint64 get0() const
    {
    #if defined __x86_64__ || defined _M_X64
@ -306,7 +313,7 @@ struct v_int64x4
    explicit v_int64x4(__m256i v) : val(v) {}
    v_int64x4(int64 v0, int64 v1, int64 v2, int64 v3)
    { val = _mm256_setr_epi64x(v0, v1, v2, v3); }
-    v_int64x4() : val(_mm256_setzero_si256()) {}
+    v_int64x4() {}

    int64 get0() const
    {
@ -329,7 +336,8 @@ struct v_float64x4
    explicit v_float64x4(__m256d v) : val(v) {}
    v_float64x4(double v0, double v1, double v2, double v3)
    { val = _mm256_setr_pd(v0, v1, v2, v3); }
-    v_float64x4() : val(_mm256_setzero_pd()) {}
+    v_float64x4() {}
+
    double get0() const { return _mm_cvtsd_f64(_mm256_castpd256_pd128(val)); }
 };

--- a/modules/core/include/opencv2/core/hal/intrin_avx512.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_avx512.hpp
@ -144,7 +144,10 @@ struct v_uint8x64
                             v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16,
                             v15, v14, v13, v12, v11, v10, v9,  v8,  v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0);
    }
-    v_uint8x64() : val(_mm512_setzero_si512()) {}
+    v_uint8x64() {}
+
+    static inline v_uint8x64 zero() { return v_uint8x64(_mm512_setzero_si512()); }
+
    uchar get0() const { return (uchar)_v_cvtsi512_si32(val); }
 };

@ -177,7 +180,10 @@ struct v_int8x64
                             v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16,
                             v15, v14, v13, v12, v11, v10, v9,  v8,  v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0);
    }
-    v_int8x64() : val(_mm512_setzero_si512()) {}
+    v_int8x64() {}
+
+    static inline v_int8x64 zero() { return v_int8x64(_mm512_setzero_si512()); }
+
    schar get0() const { return (schar)_v_cvtsi512_si32(val); }
 };

@ -200,7 +206,10 @@ struct v_uint16x32
        val = _v512_set_epu16(v31, v30, v29, v28, v27, v26, v25, v24, v23, v22, v21, v20, v19, v18, v17, v16,
                              v15, v14, v13, v12, v11, v10, v9,  v8,  v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0);
    }
-    v_uint16x32() : val(_mm512_setzero_si512()) {}
+    v_uint16x32() {}
+
+    static inline v_uint16x32 zero() { return v_uint16x32(_mm512_setzero_si512()); }
+
    ushort get0() const { return (ushort)_v_cvtsi512_si32(val); }
 };

@ -221,7 +230,10 @@ struct v_int16x32
                              (ushort)v15, (ushort)v14, (ushort)v13, (ushort)v12, (ushort)v11, (ushort)v10, (ushort)v9 , (ushort)v8,
                              (ushort)v7 , (ushort)v6 , (ushort)v5 , (ushort)v4 , (ushort)v3 , (ushort)v2 , (ushort)v1 , (ushort)v0);
    }
-    v_int16x32() : val(_mm512_setzero_si512()) {}
+    v_int16x32() {}
+
+    static inline v_int16x32 zero() { return v_int16x32(_mm512_setzero_si512()); }
+
    short get0() const { return (short)_v_cvtsi512_si32(val); }
 };

@ -240,7 +252,10 @@ struct v_uint32x16
        val = _mm512_setr_epi32((int)v0,  (int)v1,  (int)v2,  (int)v3, (int)v4,  (int)v5,  (int)v6,  (int)v7,
                                (int)v8,  (int)v9,  (int)v10, (int)v11, (int)v12, (int)v13, (int)v14, (int)v15);
    }
-    v_uint32x16() : val(_mm512_setzero_si512()) {}
+    v_uint32x16() {}
+
+    static inline v_uint32x16 zero() { return v_uint32x16(_mm512_setzero_si512()); }
+
    unsigned get0() const { return (unsigned)_v_cvtsi512_si32(val); }
 };

@ -256,7 +271,10 @@ struct v_int32x16
    {
        val = _mm512_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
    }
-    v_int32x16() : val(_mm512_setzero_si512()) {}
+    v_int32x16() {}
+
+    static inline v_int32x16 zero() { return v_int32x16(_mm512_setzero_si512()); }
+
    int get0() const { return _v_cvtsi512_si32(val); }
 };

@ -272,7 +290,10 @@ struct v_float32x16
    {
        val = _mm512_setr_ps(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
    }
-    v_float32x16() : val(_mm512_setzero_ps()) {}
+    v_float32x16() {}
+
+    static inline v_float32x16 zero() { return v_float32x16(_mm512_setzero_ps()); }
+
    float get0() const { return _mm_cvtss_f32(_mm512_castps512_ps128(val)); }
 };

@ -285,7 +306,10 @@ struct v_uint64x8
    explicit v_uint64x8(__m512i v) : val(v) {}
    v_uint64x8(uint64 v0, uint64 v1, uint64 v2, uint64 v3, uint64 v4, uint64 v5, uint64 v6, uint64 v7)
    { val = _mm512_setr_epi64((int64)v0, (int64)v1, (int64)v2, (int64)v3, (int64)v4, (int64)v5, (int64)v6, (int64)v7); }
-    v_uint64x8() : val(_mm512_setzero_si512()) {}
+    v_uint64x8() {}
+
+    static inline v_uint64x8 zero() { return v_uint64x8(_mm512_setzero_si512()); }
+
    uint64 get0() const
    {
    #if defined __x86_64__ || defined _M_X64
@ -307,7 +331,9 @@ struct v_int64x8
    explicit v_int64x8(__m512i v) : val(v) {}
    v_int64x8(int64 v0, int64 v1, int64 v2, int64 v3, int64 v4, int64 v5, int64 v6, int64 v7)
    { val = _mm512_setr_epi64(v0, v1, v2, v3, v4, v5, v6, v7); }
-    v_int64x8() : val(_mm512_setzero_si512()) {}
+    v_int64x8() {}
+
+    static inline v_int64x8 zero() { return v_int64x8(_mm512_setzero_si512()); }

    int64 get0() const
    {
@ -330,7 +356,10 @@ struct v_float64x8
    explicit v_float64x8(__m512d v) : val(v) {}
    v_float64x8(double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7)
    { val = _mm512_setr_pd(v0, v1, v2, v3, v4, v5, v6, v7); }
-    v_float64x8() : val(_mm512_setzero_pd()) {}
+    v_float64x8() {}
+
+    static inline v_float64x8 zero() { return v_float64x8(_mm512_setzero_pd()); }
+
    double get0() const { return _mm_cvtsd_f64(_mm512_castpd512_pd128(val)); }
 };

@ -1030,7 +1059,7 @@ inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
    enum { MASK = ((1 << _Tpvec::nlanes) - 1) };                                                                                           \
    if (imm == 0) return a;                                                                                                                \
    if (imm == _Tpvec::nlanes) return b;                                                                                                   \
-    if (imm >= 2*_Tpvec::nlanes) return _Tpvec();                                                                                          \
+    if (imm >= 2*_Tpvec::nlanes) return _Tpvec::zero();                                                                                    \
    return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << SHIFT2)&MASK, b.val), (MASK << (imm))&MASK, a.val)); \
 }                                                                                                                                          \
 template<int imm>                                                                                                                          \
@ -1040,21 +1069,21 @@ inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)
    enum { MASK = ((1 << _Tpvec::nlanes) - 1) };                                                                                           \
    if (imm == 0) return a;                                                                                                                \
    if (imm == _Tpvec::nlanes) return b;                                                                                                   \
-    if (imm >= 2*_Tpvec::nlanes) return _Tpvec();                                                                                          \
+    if (imm >= 2*_Tpvec::nlanes) return _Tpvec::zero();                                                                                    \
    return _Tpvec(_mm512_mask_expand_##suffix(_mm512_maskz_compress_##suffix((MASK << (imm))&MASK, a.val), (MASK << SHIFT2)&MASK, b.val)); \
 }                                                                                                                                          \
 template<int imm>                                                                                                                          \
 inline _Tpvec v_rotate_left(const _Tpvec& a)                                                                                               \
 {                                                                                                                                          \
    if (imm == 0) return a;                                                                                                                \
-    if (imm >= _Tpvec::nlanes) return _Tpvec();                                                                                            \
+    if (imm >= _Tpvec::nlanes) return _Tpvec::zero();                                                                                      \
    return _Tpvec(_mm512_maskz_expand_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val));                                              \
 }                                                                                                                                          \
 template<int imm>                                                                                                                          \
 inline _Tpvec v_rotate_right(const _Tpvec& a)                                                                                              \
 {                                                                                                                                          \
    if (imm == 0) return a;                                                                                                                \
-    if (imm >= _Tpvec::nlanes) return _Tpvec();                                                                                            \
+    if (imm >= _Tpvec::nlanes) return _Tpvec::zero();                                                                                      \
    return _Tpvec(_mm512_maskz_compress_##suffix((1 << _Tpvec::nlanes) - (1 << (imm)), a.val));                                            \
 }

--- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp
@ -25,7 +25,7 @@ struct v_uint8x16
    typedef uchar lane_type;
    enum { nlanes = 16 };

-    v_uint8x16() : val(msa_dupq_n_u8(0)) {}
+    v_uint8x16() {}
    explicit v_uint8x16(v16u8 v) : val(v) {}
    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
@ -33,6 +33,7 @@ struct v_uint8x16
        uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
        val = msa_ld1q_u8(v);
    }
+
    uchar get0() const
    {
        return msa_getq_lane_u8(val, 0);
@ -46,7 +47,7 @@ struct v_int8x16
    typedef schar lane_type;
    enum { nlanes = 16 };

-    v_int8x16() : val(msa_dupq_n_s8(0)) {}
+    v_int8x16() {}
    explicit v_int8x16(v16i8 v) : val(v) {}
    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
               schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
@ -54,6 +55,7 @@ struct v_int8x16
        schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
        val = msa_ld1q_s8(v);
    }
+
    schar get0() const
    {
        return msa_getq_lane_s8(val, 0);
@ -67,13 +69,14 @@ struct v_uint16x8
    typedef ushort lane_type;
    enum { nlanes = 8 };

-    v_uint16x8() : val(msa_dupq_n_u16(0)) {}
+    v_uint16x8() {}
    explicit v_uint16x8(v8u16 v) : val(v) {}
    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
    {
        ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
        val = msa_ld1q_u16(v);
    }
+
    ushort get0() const
    {
        return msa_getq_lane_u16(val, 0);
@ -87,13 +90,14 @@ struct v_int16x8
    typedef short lane_type;
    enum { nlanes = 8 };

-    v_int16x8() : val(msa_dupq_n_s16(0)) {}
+    v_int16x8() {}
    explicit v_int16x8(v8i16 v) : val(v) {}
    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
    {
        short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
        val = msa_ld1q_s16(v);
    }
+
    short get0() const
    {
        return msa_getq_lane_s16(val, 0);
@ -107,13 +111,14 @@ struct v_uint32x4
    typedef unsigned int lane_type;
    enum { nlanes = 4 };

-    v_uint32x4() : val(msa_dupq_n_u32(0)) {}
+    v_uint32x4() {}
    explicit v_uint32x4(v4u32 v) : val(v) {}
    v_uint32x4(unsigned int v0, unsigned int v1, unsigned int v2, unsigned int v3)
    {
        unsigned int v[] = {v0, v1, v2, v3};
        val = msa_ld1q_u32(v);
    }
+
    unsigned int get0() const
    {
        return msa_getq_lane_u32(val, 0);
@ -127,17 +132,19 @@ struct v_int32x4
    typedef int lane_type;
    enum { nlanes = 4 };

-    v_int32x4() : val(msa_dupq_n_s32(0)) {}
+    v_int32x4() {}
    explicit v_int32x4(v4i32 v) : val(v) {}
    v_int32x4(int v0, int v1, int v2, int v3)
    {
        int v[] = {v0, v1, v2, v3};
        val = msa_ld1q_s32(v);
    }
+
    int get0() const
    {
        return msa_getq_lane_s32(val, 0);
    }
+
    v4i32 val;
 };

@ -146,17 +153,19 @@ struct v_float32x4
    typedef float lane_type;
    enum { nlanes = 4 };

-    v_float32x4() : val(msa_dupq_n_f32(0.0f)) {}
+    v_float32x4() {}
    explicit v_float32x4(v4f32 v) : val(v) {}
    v_float32x4(float v0, float v1, float v2, float v3)
    {
        float v[] = {v0, v1, v2, v3};
        val = msa_ld1q_f32(v);
    }
+
    float get0() const
    {
        return msa_getq_lane_f32(val, 0);
    }
+
    v4f32 val;
 };

@ -165,17 +174,19 @@ struct v_uint64x2
    typedef uint64 lane_type;
    enum { nlanes = 2 };

-    v_uint64x2() : val(msa_dupq_n_u64(0)) {}
+    v_uint64x2() {}
    explicit v_uint64x2(v2u64 v) : val(v) {}
    v_uint64x2(uint64 v0, uint64 v1)
    {
        uint64 v[] = {v0, v1};
        val = msa_ld1q_u64(v);
    }
+
    uint64 get0() const
    {
        return msa_getq_lane_u64(val, 0);
    }
+
    v2u64 val;
 };

@ -184,17 +195,19 @@ struct v_int64x2
    typedef int64 lane_type;
    enum { nlanes = 2 };

-    v_int64x2() : val(msa_dupq_n_s64(0)) {}
+    v_int64x2() {}
    explicit v_int64x2(v2i64 v) : val(v) {}
    v_int64x2(int64 v0, int64 v1)
    {
        int64 v[] = {v0, v1};
        val = msa_ld1q_s64(v);
    }
+
    int64 get0() const
    {
        return msa_getq_lane_s64(val, 0);
    }
+
    v2i64 val;
 };

@ -203,17 +216,19 @@ struct v_float64x2
    typedef double lane_type;
    enum { nlanes = 2 };

-    v_float64x2() : val(msa_dupq_n_f64(0.0f)) {}
+    v_float64x2() {}
    explicit v_float64x2(v2f64 v) : val(v) {}
    v_float64x2(double v0, double v1)
    {
        double v[] = {v0, v1};
        val = msa_ld1q_f64(v);
    }
+
    double get0() const
    {
        return msa_getq_lane_f64(val, 0);
    }
+
    v2f64 val;
 };

--- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp
@ -75,7 +75,7 @@ struct v_uint8x16
    typedef __m128i vector_type;
    enum { nlanes = 16 };

-    v_uint8x16() : val(_mm_setzero_si128()) {}
+    v_uint8x16() {}
    explicit v_uint8x16(__m128i v) : val(v) {}
    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
@ -85,6 +85,7 @@ struct v_uint8x16
                            (char)v8, (char)v9, (char)v10, (char)v11,
                            (char)v12, (char)v13, (char)v14, (char)v15);
    }
+
    uchar get0() const
    {
        return (uchar)_mm_cvtsi128_si32(val);
@ -99,7 +100,7 @@ struct v_int8x16
    typedef __m128i vector_type;
    enum { nlanes = 16 };

-    v_int8x16() : val(_mm_setzero_si128()) {}
+    v_int8x16() {}
    explicit v_int8x16(__m128i v) : val(v) {}
    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
@ -109,6 +110,7 @@ struct v_int8x16
                            (char)v8, (char)v9, (char)v10, (char)v11,
                            (char)v12, (char)v13, (char)v14, (char)v15);
    }
+
    schar get0() const
    {
        return (schar)_mm_cvtsi128_si32(val);
@ -123,13 +125,14 @@ struct v_uint16x8
    typedef __m128i vector_type;
    enum { nlanes = 8 };

-    v_uint16x8() : val(_mm_setzero_si128()) {}
+    v_uint16x8() {}
    explicit v_uint16x8(__m128i v) : val(v) {}
    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
    {
        val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
                             (short)v4, (short)v5, (short)v6, (short)v7);
    }
+
    ushort get0() const
    {
        return (ushort)_mm_cvtsi128_si32(val);
@ -144,13 +147,14 @@ struct v_int16x8
    typedef __m128i vector_type;
    enum { nlanes = 8 };

-    v_int16x8() : val(_mm_setzero_si128()) {}
+    v_int16x8() {}
    explicit v_int16x8(__m128i v) : val(v) {}
    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
    {
        val = _mm_setr_epi16((short)v0, (short)v1, (short)v2, (short)v3,
                             (short)v4, (short)v5, (short)v6, (short)v7);
    }
+
    short get0() const
    {
        return (short)_mm_cvtsi128_si32(val);
@ -165,12 +169,13 @@ struct v_uint32x4
    typedef __m128i vector_type;
    enum { nlanes = 4 };

-    v_uint32x4() : val(_mm_setzero_si128()) {}
+    v_uint32x4() {}
    explicit v_uint32x4(__m128i v) : val(v) {}
    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
    {
        val = _mm_setr_epi32((int)v0, (int)v1, (int)v2, (int)v3);
    }
+
    unsigned get0() const
    {
        return (unsigned)_mm_cvtsi128_si32(val);
@ -185,12 +190,13 @@ struct v_int32x4
    typedef __m128i vector_type;
    enum { nlanes = 4 };

-    v_int32x4() : val(_mm_setzero_si128()) {}
+    v_int32x4() {}
    explicit v_int32x4(__m128i v) : val(v) {}
    v_int32x4(int v0, int v1, int v2, int v3)
    {
        val = _mm_setr_epi32(v0, v1, v2, v3);
    }
+
    int get0() const
    {
        return _mm_cvtsi128_si32(val);
@ -205,12 +211,13 @@ struct v_float32x4
    typedef __m128 vector_type;
    enum { nlanes = 4 };

-    v_float32x4() : val(_mm_setzero_ps()) {}
+    v_float32x4() {}
    explicit v_float32x4(__m128 v) : val(v) {}
    v_float32x4(float v0, float v1, float v2, float v3)
    {
        val = _mm_setr_ps(v0, v1, v2, v3);
    }
+
    float get0() const
    {
        return _mm_cvtss_f32(val);
@ -225,12 +232,13 @@ struct v_uint64x2
    typedef __m128i vector_type;
    enum { nlanes = 2 };

-    v_uint64x2() : val(_mm_setzero_si128()) {}
+    v_uint64x2() {}
    explicit v_uint64x2(__m128i v) : val(v) {}
    v_uint64x2(uint64 v0, uint64 v1)
    {
        val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
    }
+
    uint64 get0() const
    {
    #if !defined(__x86_64__) && !defined(_M_X64)
@ -251,12 +259,13 @@ struct v_int64x2
    typedef __m128i vector_type;
    enum { nlanes = 2 };

-    v_int64x2() : val(_mm_setzero_si128()) {}
+    v_int64x2() {}
    explicit v_int64x2(__m128i v) : val(v) {}
    v_int64x2(int64 v0, int64 v1)
    {
        val = _mm_setr_epi32((int)v0, (int)(v0 >> 32), (int)v1, (int)(v1 >> 32));
    }
+
    int64 get0() const
    {
    #if !defined(__x86_64__) && !defined(_M_X64)
@ -277,12 +286,13 @@ struct v_float64x2
    typedef __m128d vector_type;
    enum { nlanes = 2 };

-    v_float64x2() : val(_mm_setzero_pd()) {}
+    v_float64x2() {}
    explicit v_float64x2(__m128d v) : val(v) {}
    v_float64x2(double v0, double v1)
    {
        val = _mm_setr_pd(v0, v1);
    }
+
    double get0() const
    {
        return _mm_cvtsd_f64(val);
--- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
@ -28,7 +28,7 @@ struct v_uint8x16

    explicit v_uint8x16(const vec_uchar16& v) : val(v)
    {}
-    v_uint8x16() : val(vec_uchar16_z)
+    v_uint8x16()
    {}
    v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
    {}
@ -36,6 +36,9 @@ struct v_uint8x16
               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
        : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
    {}
+
+    static inline v_uint8x16 zero() { return v_uint8x16(vec_uchar16_z); }
+
    uchar get0() const
    { return vec_extract(val, 0); }
 };
@ -48,7 +51,7 @@ struct v_int8x16

    explicit v_int8x16(const vec_char16& v) : val(v)
    {}
-    v_int8x16() : val(vec_char16_z)
+    v_int8x16()
    {}
    v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
    {}
@ -56,6 +59,9 @@ struct v_int8x16
              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
        : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
    {}
+
+    static inline v_int8x16 zero() { return v_int8x16(vec_char16_z); }
+
    schar get0() const
    { return vec_extract(val, 0); }
 };
@ -68,13 +74,16 @@ struct v_uint16x8

    explicit v_uint16x8(const vec_ushort8& v) : val(v)
    {}
-    v_uint16x8() : val(vec_ushort8_z)
+    v_uint16x8()
    {}
    v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
    {}
    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
        : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
    {}
+
+    static inline v_uint16x8 zero() { return v_uint16x8(vec_ushort8_z); }
+
    ushort get0() const
    { return vec_extract(val, 0); }
 };
@ -87,13 +96,16 @@ struct v_int16x8

    explicit v_int16x8(const vec_short8& v) : val(v)
    {}
-    v_int16x8() : val(vec_short8_z)
+    v_int16x8()
    {}
    v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
    {}
    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
        : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
    {}
+
+    static inline v_int16x8 zero() { return v_int16x8(vec_short8_z); }
+
    short get0() const
    { return vec_extract(val, 0); }
 };
@ -106,12 +118,15 @@ struct v_uint32x4

    explicit v_uint32x4(const vec_uint4& v) : val(v)
    {}
-    v_uint32x4() : val(vec_uint4_z)
+    v_uint32x4()
    {}
    v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
    {}
    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3))
    {}
+
+    static inline v_uint32x4 zero() { return v_uint32x4(vec_uint4_z); }
+
    uint get0() const
    { return vec_extract(val, 0); }
 };
@ -124,12 +139,15 @@ struct v_int32x4

    explicit v_int32x4(const vec_int4& v) : val(v)
    {}
-    v_int32x4() : val(vec_int4_z)
+    v_int32x4()
    {}
    v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
    {}
    v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3))
    {}
+
+    static inline v_int32x4 zero() { return v_int32x4(vec_int4_z); }
+
    int get0() const
    { return vec_extract(val, 0); }
 };
@ -142,12 +160,15 @@ struct v_float32x4

    explicit v_float32x4(const vec_float4& v) : val(v)
    {}
-    v_float32x4() : val(vec_float4_z)
+    v_float32x4()
    {}
    v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
    {}
    v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3))
    {}
+
+    static inline v_float32x4 zero() { return v_float32x4(vec_float4_z); }
+
    float get0() const
    { return vec_extract(val, 0); }
 };
@ -160,12 +181,15 @@ struct v_uint64x2

    explicit v_uint64x2(const vec_udword2& v) : val(v)
    {}
-    v_uint64x2() : val(vec_udword2_z)
+    v_uint64x2()
    {}
    v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
    {}
    v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
    {}
+
+    static inline v_uint64x2 zero() { return v_uint64x2(vec_udword2_z); }
+
    uint64 get0() const
    { return vec_extract(val, 0); }
 };
@ -178,12 +202,15 @@ struct v_int64x2

    explicit v_int64x2(const vec_dword2& v) : val(v)
    {}
-    v_int64x2() : val(vec_dword2_z)
+    v_int64x2()
    {}
    v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
    {}
    v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
    {}
+
+    static inline v_int64x2 zero() { return v_int64x2(vec_dword2_z); }
+
    int64 get0() const
    { return vec_extract(val, 0); }
 };
@ -196,12 +223,15 @@ struct v_float64x2

    explicit v_float64x2(const vec_double2& v) : val(v)
    {}
-    v_float64x2() : val(vec_double2_z)
+    v_float64x2()
    {}
    v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
    {}
    v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1))
    {}
+
+    static inline v_float64x2 zero() { return v_float64x2(vec_double2_z); }
+
    double get0() const
    { return vec_extract(val, 0); }
 };
@ -229,7 +259,7 @@ OPENCV_HAL_IMPL_VSX_EXTRACT_N(v_float64x2, double)
 * if vec_xxx_c defined as C++ cast, clang-5 will pass it
 */
 #define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast)                        \
-inline _Tpvec v_setzero_##suffix() { return _Tpvec(); }                               \
+inline _Tpvec v_setzero_##suffix() { return _Tpvec(vec_splats((_Tp)0)); }             \
 inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));}          \
 template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a)  \
 { return _Tpvec((cast)a.val); }
@ -660,7 +690,7 @@ inline _Tpvec v_rotate_##suffix(const _Tpvec& a)
 {                                                                               \
    const int wd = imm * sizeof(typename _Tpvec::lane_type);                    \
    if (wd > 15)                                                                \
-        return _Tpvec();                                                        \
+        return _Tpvec::zero();                                                  \
    return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3)));    \
 }

@ -973,7 +1003,7 @@ inline int v_scan_forward(const v_float64x2& a) { return trailingZeros32(v_signm

 template<typename _Tpvec>
 inline bool v_check_all(const _Tpvec& a)
-{ return vec_all_lt(a.val, _Tpvec().val); }
+{ return vec_all_lt(a.val, _Tpvec::zero().val); }
 inline bool v_check_all(const v_uint8x16& a)
 { return v_check_all(v_reinterpret_as_s8(a)); }
 inline bool v_check_all(const v_uint16x8& a)
@ -989,7 +1019,7 @@ inline bool v_check_all(const v_float64x2& a)

 template<typename _Tpvec>
 inline bool v_check_any(const _Tpvec& a)
-{ return vec_any_lt(a.val, _Tpvec().val); }
+{ return vec_any_lt(a.val, _Tpvec::zero().val); }
 inline bool v_check_any(const v_uint8x16& a)
 { return v_check_any(v_reinterpret_as_s8(a)); }
 inline bool v_check_any(const v_uint16x8& a)
--- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp
@ -41,7 +41,7 @@ struct v_uint8x16
    typedef v128_t vector_type;
    enum { nlanes = 16 };

-    v_uint8x16() : val(wasm_i8x16_splat(0)) {}
+    v_uint8x16() {}
    explicit v_uint8x16(v128_t v) : val(v) {}
    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
            uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
@ -49,6 +49,7 @@ struct v_uint8x16
        uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
        val = wasm_v128_load(v);
    }
+
    uchar get0() const
    {
        return (uchar)wasm_i8x16_extract_lane(val, 0);
@ -63,7 +64,7 @@ struct v_int8x16
    typedef v128_t vector_type;
    enum { nlanes = 16 };

-    v_int8x16() : val(wasm_i8x16_splat(0)) {}
+    v_int8x16() {}
    explicit v_int8x16(v128_t v) : val(v) {}
    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
            schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
@ -71,6 +72,7 @@ struct v_int8x16
        schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
        val = wasm_v128_load(v);
    }
+
    schar get0() const
    {
        return wasm_i8x16_extract_lane(val, 0);
@ -85,13 +87,14 @@ struct v_uint16x8
    typedef v128_t vector_type;
    enum { nlanes = 8 };

-    v_uint16x8() : val(wasm_i16x8_splat(0)) {}
+    v_uint16x8() {}
    explicit v_uint16x8(v128_t v) : val(v) {}
    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
    {
        ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
        val = wasm_v128_load(v);
    }
+
    ushort get0() const
    {
        return (ushort)wasm_i16x8_extract_lane(val, 0);    // wasm_u16x8_extract_lane() unimplemented yet
@ -106,13 +109,14 @@ struct v_int16x8
    typedef v128_t vector_type;
    enum { nlanes = 8 };

-    v_int16x8() : val(wasm_i16x8_splat(0)) {}
+    v_int16x8() {}
    explicit v_int16x8(v128_t v) : val(v) {}
    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
    {
        short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
        val = wasm_v128_load(v);
    }
+
    short get0() const
    {
        return wasm_i16x8_extract_lane(val, 0);
@ -127,13 +131,14 @@ struct v_uint32x4
    typedef v128_t vector_type;
    enum { nlanes = 4 };

-    v_uint32x4() : val(wasm_i32x4_splat(0)) {}
+    v_uint32x4() {}
    explicit v_uint32x4(v128_t v) : val(v) {}
    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
    {
        unsigned v[] = {v0, v1, v2, v3};
        val = wasm_v128_load(v);
    }
+
    unsigned get0() const
    {
        return (unsigned)wasm_i32x4_extract_lane(val, 0);
@ -148,13 +153,14 @@ struct v_int32x4
    typedef v128_t vector_type;
    enum { nlanes = 4 };

-    v_int32x4() : val(wasm_i32x4_splat(0)) {}
+    v_int32x4() {}
    explicit v_int32x4(v128_t v) : val(v) {}
    v_int32x4(int v0, int v1, int v2, int v3)
    {
        int v[] = {v0, v1, v2, v3};
        val = wasm_v128_load(v);
    }
+
    int get0() const
    {
        return wasm_i32x4_extract_lane(val, 0);
@ -169,13 +175,14 @@ struct v_float32x4
    typedef v128_t vector_type;
    enum { nlanes = 4 };

-    v_float32x4() : val(wasm_f32x4_splat(0)) {}
+    v_float32x4() {}
    explicit v_float32x4(v128_t v) : val(v) {}
    v_float32x4(float v0, float v1, float v2, float v3)
    {
        float v[] = {v0, v1, v2, v3};
        val = wasm_v128_load(v);
    }
+
    float get0() const
    {
        return wasm_f32x4_extract_lane(val, 0);
@ -190,17 +197,14 @@ struct v_uint64x2
    typedef v128_t vector_type;
    enum { nlanes = 2 };

-#ifdef __wasm_unimplemented_simd128__
-    v_uint64x2() : val(wasm_i64x2_splat(0)) {}
-#else
-    v_uint64x2() : val(wasm_i32x4_splat(0)) {}
-#endif
+    v_uint64x2() {}
    explicit v_uint64x2(v128_t v) : val(v) {}
    v_uint64x2(uint64 v0, uint64 v1)
    {
        uint64 v[] = {v0, v1};
        val = wasm_v128_load(v);
    }
+
    uint64 get0() const
    {
 #ifdef __wasm_unimplemented_simd128__
@ -221,17 +225,14 @@ struct v_int64x2
    typedef v128_t vector_type;
    enum { nlanes = 2 };

-#ifdef __wasm_unimplemented_simd128__
-    v_int64x2() : val(wasm_i64x2_splat(0)) {}
-#else
-    v_int64x2() : val(wasm_i32x4_splat(0)) {}
-#endif
+    v_int64x2() {}
    explicit v_int64x2(v128_t v) : val(v) {}
    v_int64x2(int64 v0, int64 v1)
    {
        int64 v[] = {v0, v1};
        val = wasm_v128_load(v);
    }
+
    int64 get0() const
    {
 #ifdef __wasm_unimplemented_simd128__
@ -252,17 +253,14 @@ struct v_float64x2
    typedef v128_t vector_type;
    enum { nlanes = 2 };

-#ifdef __wasm_unimplemented_simd128__
-    v_float64x2() : val(wasm_f64x2_splat(0)) {}
-#else
-    v_float64x2() : val(wasm_f32x4_splat(0)) {}
-#endif
+    v_float64x2() {}
    explicit v_float64x2(v128_t v) : val(v) {}
    v_float64x2(double v0, double v1)
    {
        double v[] = {v0, v1};
        val = wasm_v128_load(v);
    }
+
    double get0() const
    {
 #ifdef __wasm_unimplemented_simd128__
--- a/modules/core/include/opencv2/core/hal/msa_macros.h
+++ b/modules/core/include/opencv2/core/hal/msa_macros.h