opencv/modules/core/src/convert.cpp
Vadim Pisarevsky 7d19bd6c19 Merge pull request #11634 from vpisarev:empty_mat_with_types_2
fixes handling of empty matrices in some functions (#11634)

* a part of PR #11416 by Yuki Takehara

* moved the empty mat check in Mat::copyTo()

* fixed some test failures
2018-05-31 16:36:39 +00:00

1507 lines
43 KiB
C++

// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert.hpp"
#include "opencv2/core/openvx/ovx_defs.hpp"
namespace cv {
template <typename T, typename DT>
struct Cvt_SIMD
{
int operator() (const T *, DT *, int) const
{
return 0;
}
};
#if CV_SIMD128
// from uchar
template <>
struct Cvt_SIMD<uchar, schar>
{
int operator() (const uchar * src, schar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_uint16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
{
v_int16x8 v_src = v_reinterpret_as_s16(v_load_expand(src + x));
v_store_low(dst + x, v_pack(v_src, v_src));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<uchar, ushort>
{
int operator() (const uchar * src, ushort * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_uint16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
v_store(dst + x, v_load_expand(src + x));
}
return x;
}
};
template <>
struct Cvt_SIMD<uchar, short>
{
int operator() (const uchar * src, short * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_uint16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
{
v_int16x8 v_src = v_reinterpret_as_s16(v_load_expand(src + x));
v_store(dst + x, v_src);
}
}
return x;
}
};
template <>
struct Cvt_SIMD<uchar, int>
{
int operator() (const uchar * src, int * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_uint16x8 v_src = v_load_expand(src + x);
v_uint32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_reinterpret_as_s32(v_src1));
v_store(dst + x + cWidth, v_reinterpret_as_s32(v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<uchar, float>
{
int operator() (const uchar * src, float * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_uint16x8 v_src = v_load_expand(src + x);
v_uint32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_cvt_f32(v_reinterpret_as_s32(v_src1)));
v_store(dst + x + cWidth, v_cvt_f32(v_reinterpret_as_s32(v_src2)));
}
}
return x;
}
};
// from schar
template <>
struct Cvt_SIMD<schar, uchar>
{
int operator() (const schar * src, uchar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
v_pack_u_store(dst + x, v_load_expand(src + x));
}
return x;
}
};
template <>
struct Cvt_SIMD<schar, short>
{
int operator() (const schar * src, short * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
v_store(dst + x, v_load_expand(src + x));
}
return x;
}
};
template <>
struct Cvt_SIMD<schar, ushort>
{
int operator() (const schar * src, ushort * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
{
v_int16x8 v_src = v_load_expand(src + x);
v_int32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_pack_u(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<schar, int>
{
int operator() (const schar * src, int * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int16x8 v_src = v_load_expand(src + x);
v_int32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_src1);
v_store(dst + x + cWidth, v_src2);
}
}
return x;
}
};
template <>
struct Cvt_SIMD<schar, float>
{
int operator() (const schar * src, float * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int16x8 v_src = v_load_expand(src + x);
v_int32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_cvt_f32(v_src1));
v_store(dst + x + cWidth, v_cvt_f32(v_src2));
}
}
return x;
}
};
// from ushort
template <>
struct Cvt_SIMD<ushort, uchar>
{
int operator() (const ushort * src, uchar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_uint16x8::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_uint16x8 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_store(dst + x, v_pack(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<ushort, schar>
{
int operator() (const ushort * src, schar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_uint16x8::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_uint16x8 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_uint32x4 v_dst10, v_dst11, v_dst20, v_dst21;
v_expand(v_src1, v_dst10, v_dst11);
v_expand(v_src2, v_dst20, v_dst21);
v_store(dst + x, v_pack(
v_pack(v_reinterpret_as_s32(v_dst10), v_reinterpret_as_s32(v_dst11)),
v_pack(v_reinterpret_as_s32(v_dst20), v_reinterpret_as_s32(v_dst21))));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<ushort, short>
{
int operator() (const ushort * src, short * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_uint16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
{
v_uint16x8 v_src = v_load(src + x);
v_uint32x4 v_dst0, v_dst1;
v_expand(v_src, v_dst0, v_dst1);
v_store(dst + x, v_pack(v_reinterpret_as_s32(v_dst0), v_reinterpret_as_s32(v_dst1)));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<ushort, int>
{
int operator() (const ushort * src, int * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_uint16x8 v_src = v_load(src + x);
v_uint32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_reinterpret_as_s32(v_src1));
v_store(dst + x + cWidth, v_reinterpret_as_s32(v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<ushort, float>
{
int operator() (const ushort * src, float * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_uint16x8 v_src = v_load(src + x);
v_uint32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_cvt_f32(v_reinterpret_as_s32(v_src1)));
v_store(dst + x + cWidth, v_cvt_f32(v_reinterpret_as_s32(v_src2)));
}
}
return x;
}
};
// from short
template <>
struct Cvt_SIMD<short, uchar>
{
int operator() (const short * src, uchar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int16x8::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int16x8 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_store(dst + x, v_pack_u(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<short, schar>
{
int operator() (const short * src, schar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int16x8::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int16x8 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_store(dst + x, v_pack(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<short, ushort>
{
int operator() (const short * src, ushort * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int16x8::nlanes;
for (; x <= width - cWidth; x += cWidth)
{
v_int16x8 v_src = v_load(src + x);
v_int32x4 v_dst1, v_dst2;
v_expand(v_src, v_dst1, v_dst2);
v_store(dst + x, v_pack_u(v_dst1, v_dst2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<short, int>
{
int operator() (const short * src, int * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int16x8 v_src = v_load(src + x);
v_int32x4 v_dst1, v_dst2;
v_expand(v_src, v_dst1, v_dst2);
v_store(dst + x, v_dst1);
v_store(dst + x + cWidth, v_dst2);
}
}
return x;
}
};
template <>
struct Cvt_SIMD<short, float>
{
int operator() (const short * src, float * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int16x8 v_src = v_load(src + x);
v_int32x4 v_dst1, v_dst2;
v_expand(v_src, v_dst1, v_dst2);
v_store(dst + x, v_cvt_f32(v_dst1));
v_store(dst + x + cWidth, v_cvt_f32(v_dst2));
}
}
return x;
}
};
// from int
template <>
struct Cvt_SIMD<int, uchar>
{
int operator() (const int * src, uchar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_int32x4 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_int32x4 v_src3 = v_load(src + x + cWidth * 2), v_src4 = v_load(src + x + cWidth * 3);
v_uint16x8 v_dst1 = v_pack_u(v_src1, v_src2);
v_uint16x8 v_dst2 = v_pack_u(v_src3, v_src4);
v_store(dst + x, v_pack(v_dst1, v_dst2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<int, schar>
{
int operator() (const int * src, schar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_int32x4 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_int32x4 v_src3 = v_load(src + x + cWidth * 2), v_src4 = v_load(src + x + cWidth * 3);
v_int16x8 v_dst1 = v_pack(v_src1, v_src2);
v_int16x8 v_dst2 = v_pack(v_src3, v_src4);
v_store(dst + x, v_pack(v_dst1, v_dst2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<int, ushort>
{
int operator() (const int * src, ushort * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int32x4 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_store(dst + x, v_pack_u(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<int, short>
{
int operator() (const int * src, short * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int32x4 v_src1 = v_load(src + x), v_src2 = v_load(src + x + cWidth);
v_store(dst + x, v_pack(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<int, float>
{
int operator() (const int * src, float * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_int32x4::nlanes;
for (; x <= width - cWidth; x += cWidth)
v_store(dst + x, v_cvt_f32(v_load(src + x)));
}
return x;
}
};
// from float
template <>
struct Cvt_SIMD<float, uchar>
{
int operator() (const float * src, uchar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float32x4::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_int32x4 v_src1 = v_round(v_load(src + x));
v_int32x4 v_src2 = v_round(v_load(src + x + cWidth));
v_int32x4 v_src3 = v_round(v_load(src + x + cWidth * 2));
v_int32x4 v_src4 = v_round(v_load(src + x + cWidth * 3));
v_uint16x8 v_dst1 = v_pack_u(v_src1, v_src2);
v_uint16x8 v_dst2 = v_pack_u(v_src3, v_src4);
v_store(dst + x, v_pack(v_dst1, v_dst2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<float, schar>
{
int operator() (const float * src, schar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float32x4::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_int32x4 v_src1 = v_round(v_load(src + x));
v_int32x4 v_src2 = v_round(v_load(src + x + cWidth));
v_int32x4 v_src3 = v_round(v_load(src + x + cWidth * 2));
v_int32x4 v_src4 = v_round(v_load(src + x + cWidth * 3));
v_int16x8 v_dst1 = v_pack(v_src1, v_src2);
v_int16x8 v_dst2 = v_pack(v_src3, v_src4);
v_store(dst + x, v_pack(v_dst1, v_dst2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<float, ushort>
{
int operator() (const float * src, ushort * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int32x4 v_src1 = v_round(v_load(src + x));
v_int32x4 v_src2 = v_round(v_load(src + x + cWidth));
v_store(dst + x, v_pack_u(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<float, short>
{
int operator() (const float * src, short * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float32x4::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int32x4 v_src1 = v_round(v_load(src + x));
v_int32x4 v_src2 = v_round(v_load(src + x + cWidth));
v_store(dst + x, v_pack(v_src1, v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<float, int>
{
int operator() (const float * src, int * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float32x4::nlanes;
for (; x <= width - cWidth; x += cWidth)
v_store(dst + x, v_round(v_load(src + x)));
}
return x;
}
};
#if CV_SIMD128_64F
// from double
template <>
struct Cvt_SIMD<double, uchar>
{
int operator() (const double * src, uchar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_float32x4 v_src0 = v_cvt_f32(v_load(src + x));
v_float32x4 v_src1 = v_cvt_f32(v_load(src + x + cWidth));
v_float32x4 v_src2 = v_cvt_f32(v_load(src + x + cWidth * 2));
v_float32x4 v_src3 = v_cvt_f32(v_load(src + x + cWidth * 3));
v_src0 = v_combine_low(v_src0, v_src1);
v_src1 = v_combine_low(v_src2, v_src3);
v_int16x8 v_dst = v_pack(v_round(v_src0), v_round(v_src1));
v_pack_u_store(dst + x, v_dst);
}
}
return x;
}
};
template <>
struct Cvt_SIMD<double, schar>
{
int operator() (const double * src, schar * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_float32x4 v_src0 = v_cvt_f32(v_load(src + x));
v_float32x4 v_src1 = v_cvt_f32(v_load(src + x + cWidth));
v_float32x4 v_src2 = v_cvt_f32(v_load(src + x + cWidth * 2));
v_float32x4 v_src3 = v_cvt_f32(v_load(src + x + cWidth * 3));
v_src0 = v_combine_low(v_src0, v_src1);
v_src1 = v_combine_low(v_src2, v_src3);
v_int16x8 v_dst = v_pack(v_round(v_src0), v_round(v_src1));
v_store_low(dst + x, v_pack(v_dst, v_dst));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<double, ushort>
{
int operator() (const double * src, ushort * dst, int width) const
{
int x = 0;
#if CV_TRY_SSE4_1
if (CV_CPU_HAS_SUPPORT_SSE4_1)
return opt_SSE4_1::Cvt_SIMD_f64u16_SSE41(src, dst, width);
#endif
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_float32x4 v_src0 = v_cvt_f32(v_load(src + x));
v_float32x4 v_src1 = v_cvt_f32(v_load(src + x + cWidth));
v_float32x4 v_src2 = v_cvt_f32(v_load(src + x + cWidth * 2));
v_float32x4 v_src3 = v_cvt_f32(v_load(src + x + cWidth * 3));
v_src0 = v_combine_low(v_src0, v_src1);
v_src1 = v_combine_low(v_src2, v_src3);
v_uint16x8 v_dst = v_pack_u(v_round(v_src0), v_round(v_src1));
v_store(dst + x, v_dst);
}
}
return x;
}
};
template <>
struct Cvt_SIMD<double, short>
{
int operator() (const double * src, short * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_float32x4 v_src0 = v_cvt_f32(v_load(src + x));
v_float32x4 v_src1 = v_cvt_f32(v_load(src + x + cWidth));
v_float32x4 v_src2 = v_cvt_f32(v_load(src + x + cWidth * 2));
v_float32x4 v_src3 = v_cvt_f32(v_load(src + x + cWidth * 3));
v_src0 = v_combine_low(v_src0, v_src1);
v_src1 = v_combine_low(v_src2, v_src3);
v_int16x8 v_dst = v_pack(v_round(v_src0), v_round(v_src1));
v_store(dst + x, v_dst);
}
}
return x;
}
};
template <>
struct Cvt_SIMD<double, int>
{
int operator() (const double * src, int * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int32x4 v_src0 = v_round(v_load(src + x));
v_int32x4 v_src1 = v_round(v_load(src + x + cWidth));
v_store(dst + x, v_combine_low(v_src0, v_src1));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<double, float>
{
int operator() (const double * src, float * dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_float32x4 v_src0 = v_cvt_f32(v_load(src + x));
v_float32x4 v_src1 = v_cvt_f32(v_load(src + x + cWidth));
v_store(dst + x, v_combine_low(v_src0, v_src1));
}
}
return x;
}
};
// to double
template <>
struct Cvt_SIMD<uchar, double>
{
int operator() (const uchar* src, double* dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_uint16x8 v_src = v_load_expand(src + x);
v_uint32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_cvt_f64(v_reinterpret_as_s32(v_src1)));
v_store(dst + x + cWidth, v_cvt_f64_high(v_reinterpret_as_s32(v_src1)));
v_store(dst + x + cWidth * 2, v_cvt_f64(v_reinterpret_as_s32(v_src2)));
v_store(dst + x + cWidth * 3, v_cvt_f64_high(v_reinterpret_as_s32(v_src2)));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<schar, double>
{
int operator() (const schar* src, double* dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 4; x += cWidth * 4)
{
v_int16x8 v_src = v_load_expand(src + x);
v_int32x4 v_src1, v_src2;
v_expand(v_src, v_src1, v_src2);
v_store(dst + x, v_cvt_f64(v_src1));
v_store(dst + x + cWidth, v_cvt_f64_high(v_src1));
v_store(dst + x + cWidth * 2, v_cvt_f64(v_src2));
v_store(dst + x + cWidth * 3, v_cvt_f64_high(v_src2));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<ushort, double>
{
int operator() (const ushort* src, double* dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_uint32x4 v_src = v_load_expand(src + x);
v_store(dst + x, v_cvt_f64(v_reinterpret_as_s32(v_src)));
v_store(dst + x + cWidth, v_cvt_f64_high(v_reinterpret_as_s32(v_src)));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<short, double>
{
int operator() (const short* src, double* dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int32x4 v_src = v_load_expand(src + x);
v_store(dst + x, v_cvt_f64(v_src));
v_store(dst + x + cWidth, v_cvt_f64_high(v_src));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<int, double>
{
int operator() (const int* src, double* dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_int32x4 v_src = v_load(src + x);
v_store(dst + x, v_cvt_f64(v_src));
v_store(dst + x + cWidth, v_cvt_f64_high(v_src));
}
}
return x;
}
};
template <>
struct Cvt_SIMD<float, double>
{
int operator() (const float* src, double* dst, int width) const
{
int x = 0;
if (hasSIMD128())
{
int cWidth = v_float64x2::nlanes;
for (; x <= width - cWidth * 2; x += cWidth * 2)
{
v_float32x4 v_src = v_load(src + x);
v_store(dst + x, v_cvt_f64(v_src));
v_store(dst + x + cWidth, v_cvt_f64_high(v_src));
}
}
return x;
}
};
#endif // CV_SIMD128_64F
#endif // CV_SIMD128
#ifdef HAVE_OPENVX
template<typename T, typename DT>
static bool _openvx_cvt(const T* src, size_t sstep,
DT* dst, size_t dstep, Size continuousSize)
{
using namespace ivx;
if(!(continuousSize.width > 0 && continuousSize.height > 0))
{
return true;
}
//.height is for number of continuous pieces
//.width is for length of one piece
Size imgSize = continuousSize;
if(continuousSize.height == 1)
{
if(sstep / sizeof(T) == dstep / sizeof(DT) && sstep / sizeof(T) > 0 &&
continuousSize.width % (sstep / sizeof(T)) == 0)
{
//continuous n-lines image
imgSize.width = sstep / sizeof(T);
imgSize.height = continuousSize.width / (sstep / sizeof(T));
}
else
{
//1-row image with possibly incorrect step
sstep = continuousSize.width * sizeof(T);
dstep = continuousSize.width * sizeof(DT);
}
}
int srcType = DataType<T>::type, dstType = DataType<DT>::type;
if (ovx::skipSmallImages<VX_KERNEL_CONVERTDEPTH>(imgSize.width, imgSize.height))
return false;
try
{
Context context = ovx::getOpenVXContext();
// Other conversions are marked as "experimental"
if(context.vendorID() == VX_ID_KHRONOS &&
!(srcType == CV_8U && dstType == CV_16S) &&
!(srcType == CV_16S && dstType == CV_8U))
{
return false;
}
Image srcImage = Image::createFromHandle(context, Image::matTypeToFormat(srcType),
Image::createAddressing(imgSize.width, imgSize.height,
(vx_uint32)sizeof(T), (vx_uint32)sstep),
(void*)src);
Image dstImage = Image::createFromHandle(context, Image::matTypeToFormat(dstType),
Image::createAddressing(imgSize.width, imgSize.height,
(vx_uint32)sizeof(DT), (vx_uint32)dstep),
(void*)dst);
IVX_CHECK_STATUS(vxuConvertDepth(context, srcImage, dstImage, VX_CONVERT_POLICY_SATURATE, 0));
#ifdef VX_VERSION_1_1
//we should take user memory back before release
//(it's not done automatically according to standard)
srcImage.swapHandle(); dstImage.swapHandle();
#endif
}
catch (RuntimeError & e)
{
VX_DbgThrow(e.what());
}
catch (WrapperError & e)
{
VX_DbgThrow(e.what());
}
return true;
}
template<typename T, typename DT>
static bool openvx_cvt(const T* src, size_t sstep,
DT* dst, size_t dstep, Size size)
{
(void)src; (void)sstep; (void)dst; (void)dstep; (void)size;
return false;
}
#define DEFINE_OVX_CVT_SPECIALIZATION(T, DT) \
template<> \
bool openvx_cvt(const T *src, size_t sstep, DT *dst, size_t dstep, Size size) \
{ \
return _openvx_cvt<T, DT>(src, sstep, dst, dstep, size); \
}
DEFINE_OVX_CVT_SPECIALIZATION(uchar, ushort)
DEFINE_OVX_CVT_SPECIALIZATION(uchar, short)
DEFINE_OVX_CVT_SPECIALIZATION(uchar, int)
DEFINE_OVX_CVT_SPECIALIZATION(ushort, uchar)
DEFINE_OVX_CVT_SPECIALIZATION(ushort, int)
DEFINE_OVX_CVT_SPECIALIZATION(short, uchar)
DEFINE_OVX_CVT_SPECIALIZATION(short, int)
DEFINE_OVX_CVT_SPECIALIZATION(int, uchar)
DEFINE_OVX_CVT_SPECIALIZATION(int, ushort)
DEFINE_OVX_CVT_SPECIALIZATION(int, short)
#endif
template<typename T, typename DT> static void
cvt_( const T* src, size_t sstep,
DT* dst, size_t dstep, Size size )
{
CV_OVX_RUN(
true,
openvx_cvt(src, sstep, dst, dstep, size)
)
sstep /= sizeof(src[0]);
dstep /= sizeof(dst[0]);
Cvt_SIMD<T, DT> vop;
for( ; size.height--; src += sstep, dst += dstep )
{
int x = vop(src, dst, size.width);
#if CV_ENABLE_UNROLLED
for( ; x <= size.width - 4; x += 4 )
{
DT t0, t1;
t0 = saturate_cast<DT>(src[x]);
t1 = saturate_cast<DT>(src[x+1]);
dst[x] = t0; dst[x+1] = t1;
t0 = saturate_cast<DT>(src[x+2]);
t1 = saturate_cast<DT>(src[x+3]);
dst[x+2] = t0; dst[x+3] = t1;
}
#endif
for( ; x < size.width; x++ )
dst[x] = saturate_cast<DT>(src[x]);
}
}
template<typename T> static void
cpy_( const T* src, size_t sstep, T* dst, size_t dstep, Size size )
{
sstep /= sizeof(src[0]);
dstep /= sizeof(dst[0]);
for( ; size.height--; src += sstep, dst += dstep )
memcpy(dst, src, size.width*sizeof(src[0]));
}
#if defined(HAVE_IPP)
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#else
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
#endif
#define DEF_CVT_FUNC(suffix, stype, dtype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CPY_FUNC(suffix, stype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
stype* dst, size_t dstep, Size size, double*) \
{ \
cpy_(src, sstep, dst, dstep, size); \
}
DEF_CPY_FUNC(8u, uchar)
DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs)
DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R)
DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R)
DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R)
DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs)
DEF_CVT_FUNC(64f8u, double, uchar)
DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs)
DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs)
DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs)
DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R)
DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs)
DEF_CVT_FUNC(64f8s, double, schar)
DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R)
DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs)
DEF_CPY_FUNC(16u, ushort)
DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
DEF_CVT_FUNC(64f16u, double, ushort)
DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R)
DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R)
DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
DEF_CVT_FUNC(32f16s, float, short)
DEF_CVT_FUNC(64f16s, double, short)
DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R)
DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R)
DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
DEF_CPY_FUNC(32s, int)
DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
DEF_CVT_FUNC(64f32s, double, int)
DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R)
DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R)
DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
DEF_CVT_FUNC(64f32f, double, float)
DEF_CVT_FUNC(8u64f, uchar, double)
DEF_CVT_FUNC(8s64f, schar, double)
DEF_CVT_FUNC(16u64f, ushort, double)
DEF_CVT_FUNC(16s64f, short, double)
DEF_CVT_FUNC(32s64f, int, double)
DEF_CVT_FUNC(32f64f, float, double)
DEF_CPY_FUNC(64s, int64)
BinaryFunc getConvertFunc(int sdepth, int ddepth)
{
static BinaryFunc cvtTab[][8] =
{
{
(BinaryFunc)(cvt8u), (BinaryFunc)GET_OPTIMIZED(cvt8s8u), (BinaryFunc)GET_OPTIMIZED(cvt16u8u),
(BinaryFunc)GET_OPTIMIZED(cvt16s8u), (BinaryFunc)GET_OPTIMIZED(cvt32s8u), (BinaryFunc)GET_OPTIMIZED(cvt32f8u),
(BinaryFunc)GET_OPTIMIZED(cvt64f8u), 0
},
{
(BinaryFunc)GET_OPTIMIZED(cvt8u8s), (BinaryFunc)cvt8u, (BinaryFunc)GET_OPTIMIZED(cvt16u8s),
(BinaryFunc)GET_OPTIMIZED(cvt16s8s), (BinaryFunc)GET_OPTIMIZED(cvt32s8s), (BinaryFunc)GET_OPTIMIZED(cvt32f8s),
(BinaryFunc)GET_OPTIMIZED(cvt64f8s), 0
},
{
(BinaryFunc)GET_OPTIMIZED(cvt8u16u), (BinaryFunc)GET_OPTIMIZED(cvt8s16u), (BinaryFunc)cvt16u,
(BinaryFunc)GET_OPTIMIZED(cvt16s16u), (BinaryFunc)GET_OPTIMIZED(cvt32s16u), (BinaryFunc)GET_OPTIMIZED(cvt32f16u),
(BinaryFunc)GET_OPTIMIZED(cvt64f16u), 0
},
{
(BinaryFunc)GET_OPTIMIZED(cvt8u16s), (BinaryFunc)GET_OPTIMIZED(cvt8s16s), (BinaryFunc)GET_OPTIMIZED(cvt16u16s),
(BinaryFunc)cvt16u, (BinaryFunc)GET_OPTIMIZED(cvt32s16s), (BinaryFunc)GET_OPTIMIZED(cvt32f16s),
(BinaryFunc)GET_OPTIMIZED(cvt64f16s), 0
},
{
(BinaryFunc)GET_OPTIMIZED(cvt8u32s), (BinaryFunc)GET_OPTIMIZED(cvt8s32s), (BinaryFunc)GET_OPTIMIZED(cvt16u32s),
(BinaryFunc)GET_OPTIMIZED(cvt16s32s), (BinaryFunc)cvt32s, (BinaryFunc)GET_OPTIMIZED(cvt32f32s),
(BinaryFunc)GET_OPTIMIZED(cvt64f32s), 0
},
{
(BinaryFunc)GET_OPTIMIZED(cvt8u32f), (BinaryFunc)GET_OPTIMIZED(cvt8s32f), (BinaryFunc)GET_OPTIMIZED(cvt16u32f),
(BinaryFunc)GET_OPTIMIZED(cvt16s32f), (BinaryFunc)GET_OPTIMIZED(cvt32s32f), (BinaryFunc)cvt32s,
(BinaryFunc)GET_OPTIMIZED(cvt64f32f), 0
},
{
(BinaryFunc)GET_OPTIMIZED(cvt8u64f), (BinaryFunc)GET_OPTIMIZED(cvt8s64f), (BinaryFunc)GET_OPTIMIZED(cvt16u64f),
(BinaryFunc)GET_OPTIMIZED(cvt16s64f), (BinaryFunc)GET_OPTIMIZED(cvt32s64f), (BinaryFunc)GET_OPTIMIZED(cvt32f64f),
(BinaryFunc)(cvt64s), 0
},
{
0, 0, 0, 0, 0, 0, 0, 0
}
};
return cvtTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
}
} // cv::
#ifdef HAVE_IPP
namespace cv
{
static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
IppDataType srcDepth = ippiGetDataType(src.depth());
IppDataType dstDepth = ippiGetDataType(dst.depth());
int channels = src.channels();
if(src.dims == 0)
return false;
::ipp::IwiImage iwSrc;
::ipp::IwiImage iwDst;
try
{
IppHintAlgorithm mode = ippAlgHintFast;
if(dstDepth == ipp64f ||
(dstDepth == ipp32f && (srcDepth == ipp32s || srcDepth == ipp64f)) ||
(dstDepth == ipp32s && (srcDepth == ipp32s || srcDepth == ipp64f)))
mode = ippAlgHintAccurate;
if(src.dims <= 2)
{
Size sz = getContinuousSize(src, dst, channels);
iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step);
iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step);
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
}
else
{
const Mat *arrays[] = {&src, &dst, NULL};
uchar *ptrs[2] = {NULL};
NAryMatIterator it(arrays, ptrs);
iwSrc.Init(ippiSize(it.size, 1), srcDepth, channels);
iwDst.Init(ippiSize(it.size, 1), dstDepth, channels);
for(size_t i = 0; i < it.nplanes; i++, ++it)
{
iwSrc.m_ptr = ptrs[0];
iwDst.m_ptr = ptrs[1];
CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
}
}
}
catch (::ipp::IwException)
{
return false;
}
return true;
#else
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(alpha); CV_UNUSED(beta);
return false;
#endif
}
} // cv::
#endif
void cv::Mat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
{
CV_INSTRUMENT_REGION()
if( empty() )
{
_dst.release();
return;
}
bool noScale = fabs(alpha-1) < DBL_EPSILON && fabs(beta) < DBL_EPSILON;
if( _type < 0 )
_type = _dst.fixedType() ? _dst.type() : type();
else
_type = CV_MAKETYPE(CV_MAT_DEPTH(_type), channels());
int sdepth = depth(), ddepth = CV_MAT_DEPTH(_type);
if( sdepth == ddepth && noScale )
{
copyTo(_dst);
return;
}
Mat src = *this;
if( dims <= 2 )
_dst.create( size(), _type );
else
_dst.create( dims, size, _type );
Mat dst = _dst.getMat();
CV_IPP_RUN_FAST(ipp_convertTo(src, dst, alpha, beta ));
BinaryFunc func = noScale ? getConvertFunc(sdepth, ddepth) : getConvertScaleFunc(sdepth, ddepth);
double scale[] = {alpha, beta};
int cn = channels();
CV_Assert( func != 0 );
if( dims <= 2 )
{
Size sz = getContinuousSize(src, dst, cn);
func( src.data, src.step, 0, 0, dst.data, dst.step, sz, scale );
}
else
{
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs);
Size sz((int)(it.size*cn), 1);
for( size_t i = 0; i < it.nplanes; i++, ++it )
func(ptrs[0], 1, 0, 0, ptrs[1], 1, sz, scale);
}
}
//==================================================================================================
namespace cv {
// template for FP16 HW conversion function
template<typename T, typename DT> static void
cvtScaleHalf_( const T* src, size_t sstep, DT* dst, size_t dstep, Size size);
template<> void
cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t dstep, Size size )
{
CV_CPU_CALL_FP16_(cvtScaleHalf_SIMD32f16f, (src, sstep, dst, dstep, size));
#if !CV_CPU_FORCE_FP16
sstep /= sizeof(src[0]);
dstep /= sizeof(dst[0]);
for( ; size.height--; src += sstep, dst += dstep )
{
for ( int x = 0; x < size.width; x++ )
{
dst[x] = convertFp16SW(src[x]);
}
}
#endif
}
template<> void
cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t dstep, Size size )
{
CV_CPU_CALL_FP16_(cvtScaleHalf_SIMD16f32f, (src, sstep, dst, dstep, size));
#if !CV_CPU_FORCE_FP16
sstep /= sizeof(src[0]);
dstep /= sizeof(dst[0]);
for( ; size.height--; src += sstep, dst += dstep )
{
for ( int x = 0; x < size.width; x++ )
{
dst[x] = convertFp16SW(src[x]);
}
}
#endif
}
#define DEF_CVT_SCALE_FP16_FUNC(suffix, stype, dtype) \
static void cvtScaleHalf##suffix( const stype* src, size_t sstep, \
dtype* dst, size_t dstep, Size size, void*) \
{ \
cvtScaleHalf_<stype,dtype>(src, sstep, dst, dstep, size); \
}
DEF_CVT_SCALE_FP16_FUNC(32f16f, float, short)
DEF_CVT_SCALE_FP16_FUNC(16f32f, short, float)
static UnaryFunc getConvertFuncFp16(int ddepth)
{
static UnaryFunc cvtTab[] =
{
0, 0, 0,
(UnaryFunc)(cvtScaleHalf32f16f), 0, (UnaryFunc)(cvtScaleHalf16f32f),
0, 0,
};
return cvtTab[CV_MAT_DEPTH(ddepth)];
}
#ifdef HAVE_OPENCL
static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int ddepth )
{
int type = _src.type(), cn = CV_MAT_CN(type);
_dst.createSameSize( _src, CV_MAKETYPE(ddepth, cn) );
int kercn = 1;
int rowsPerWI = 1;
String build_opt = format("-D HALF_SUPPORT -D dstT=%s -D srcT=%s -D rowsPerWI=%d%s",
ddepth == CV_16S ? "half" : "float",
ddepth == CV_16S ? "float" : "half",
rowsPerWI,
ddepth == CV_16S ? " -D FLOAT_TO_HALF " : "");
ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt);
if (k.empty())
return false;
UMat src = _src.getUMat();
UMat dst = _dst.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
k.args(srcarg, dstarg);
size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
} //cv::
void cv::convertFp16( InputArray _src, OutputArray _dst)
{
CV_INSTRUMENT_REGION()
int ddepth = 0;
switch( _src.depth() )
{
case CV_32F:
ddepth = CV_16S;
break;
case CV_16S:
ddepth = CV_32F;
break;
default:
CV_Error(Error::StsUnsupportedFormat, "Unsupported input depth");
return;
}
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
ocl_convertFp16(_src, _dst, ddepth))
Mat src = _src.getMat();
int type = CV_MAKETYPE(ddepth, src.channels());
_dst.create( src.dims, src.size, type );
Mat dst = _dst.getMat();
UnaryFunc func = getConvertFuncFp16(ddepth);
int cn = src.channels();
CV_Assert( func != 0 );
if( src.dims <= 2 )
{
Size sz = getContinuousSize(src, dst, cn);
func( src.data, src.step, dst.data, dst.step, sz, 0);
}
else
{
const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs);
Size sz((int)(it.size*cn), 1);
for( size_t i = 0; i < it.nplanes; i++, ++it )
func(ptrs[0], 1, ptrs[1], 1, sz, 0);
}
}